diff --git a/python/main.py b/python/main.py index 257efa7b7dbf75ce1f590e72a1838b46f7287c0e..f04ca77f78ee3d1f031a032a34673ed68c8cd923 100644 --- a/python/main.py +++ b/python/main.py @@ -1,9 +1,11 @@ import os import glob +import math from math import sqrt import torch import torch.nn as nn +import torch.nn.init as init import torch.backends.cudnn as cudnn import torchvision import torchvision.transforms as transforms @@ -43,15 +45,50 @@ LEARNING_RATE = 0.001 # DEAFULT IS 0.001 ################################## # Here we consistently use RGB color space and LEARNING_RATE = 0.001 # -# Max pooling: (73% goalnet) +# Max pooling (default): (73% goalnet) # Avg pooling: (59% goalnet) # Max + Avg + Max (72% goalnet) # Avg + Max + Avg (62 % goalnet) -# # Max pooling, 8x8,4x4,2x2: (73% goalnet) # Max pooling, 16x16,9x9,2x2 (68% goalnet) # Max pooling, 24x24,4x4,2x2 (65% goalnet) +################################## +## CUSTOM INITIALIZATION ## +################################## +# As before, SPACE = "RGB" and LEARNING_RATE = 0.001. +# Biases are initialized as well +# +# Kaiming uniform (default): (73% goalnet) +# Kaiming normal: (71% goalnet) +# Normal (m=0, std=1): (10% goalnet) +# Uniform (a=0, b= 1): (20% goalnet) +# Ones: (10% goalnet) +# Zeros (10% goalnet) + + +################################## +## IMAGE SIZE ## +################################## +# Default settings as usual. +# The resized image is fed to the first convolution layer, +# and then downsampled to (32x32) by the following pooling layer. +# +# (32x32) (default): (73% goalnet) +# (64x64): (73% goalnet) +# (16x16): (65% goalnet) + +################################## +## Learning rate ## +################################## +# Default settings. +# +# lr = 0.001 (default): (73% goalnet) +# lr = 0.0001: (70% goalnet) +# lr = 0.00001: (50% goalnet) +# lr = 0.01: (49% goalnet) +# lr = 0.1: (10% goalnet) + ################################## ## PREPARE DATA ## ################################## @@ -59,11 +96,13 @@ LEARNING_RATE = 0.001 # DEAFULT IS 0.001 CHANNELS = 1 if SPACE == "L" else 3 MEAN = MEAN_MAP[SPACE] STD = MEAN_MAP[SPACE] +IMG_SCALE = 1 def to_color_space(img): return img.convert(SPACE) transform_comp = transforms.Compose([ + transforms.Resize((int(IMG_SCALE*32),int(IMG_SCALE*32))), to_color_space, transforms.ToTensor(), transforms.Normalize(MEAN, STD), @@ -92,13 +131,30 @@ testloader = torch.utils.data.DataLoader(testset, ## PREPARE NETWORK ## ################################## +def init_weights(m): + if type(m) in {nn.Conv2d, nn.Linear}: + + init.uniform_(m.weight) + init.uniform_(m.bias) + + #Below is the DEFAULT initialization: + # + #init.kaiming_uniform_(m.weight, a=math.sqrt(5)) + # + #fan_in, _ = init._calculate_fan_in_and_fan_out(m.weight) + #bound = 1 / math.sqrt(fan_in) + #init.uniform_(m.bias, -bound, bound) + use_cuda = torch.cuda.is_available() classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') #model = cvlNet() -model = goalNet(CHANNELS) +model = goalNet(CHANNELS, IMG_SCALE) + +#initialize weights with function specified in 'init_weights' +#model.apply(init_weights) if use_cuda: model.cuda() @@ -108,6 +164,8 @@ objective = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) + + ################################## ## TRAIN NETWORK ## ################################## diff --git a/python/models/goalNet.py b/python/models/goalNet.py index eabe9782593c68bb3fcea15e4ff3d810246cc08c..23c9c54318a4324203b4b4026445285a9ccfe5f9 100644 --- a/python/models/goalNet.py +++ b/python/models/goalNet.py @@ -5,7 +5,7 @@ import torch.nn.functional as F class goalNet(nn.Module): - def __init__(self, channels): + def __init__(self, channels, img_scale): super(goalNet, self).__init__() @@ -15,15 +15,18 @@ class goalNet(nn.Module): self.seq1 = nn.Sequential( # Input rgb channels, output 32 channels (32 parallel computations), kernel size 5x5 nn.Conv2d(channels, 32, 5, padding=2), + # outputs 16x16xc featurmap. Stride 's' downscales by a factor 's' #nn.MaxPool2d(kernel_size=24, padding=11, stride=2), #nn.MaxPool2d(kernel_size=16, padding=7, stride=2), - nn.MaxPool2d(kernel_size=8, padding=4, stride=2), + nn.MaxPool2d(kernel_size=8, padding=4, stride= int(2 * img_scale)), + #nn.AvgPool2d(kernel_size=8, padding=4, stride=2), nn.ReLU()) self.seq2 = nn.Sequential( nn.Conv2d(32, 32, 5, padding=2), nn.ReLU(), + #nn.MaxPool2d(kernel_size=4, padding=1, stride=2) #nn.MaxPool2d(kernel_size=9, padding=0, stride=1) nn.MaxPool2d(kernel_size=4, padding=2, stride=2) @@ -38,6 +41,7 @@ class goalNet(nn.Module): # Fully connected since expects a 4x4xc feature map self.fc1 = nn.Conv2d(64, 64, 4) self.relu4 = nn.ReLU() + # One prediction layer based on extracted features self.prediction = nn.Linear(64, 10) self.loss = nn.LogSoftmax(1)