uppgift 6

02fad5b4 · mehfo331 · bf7ba060 · 02fad5b4 · 02fad5b4
Commit 02fad5b4 authored 5 years ago by mehfo331
--- a/python/main.py
+++ b/python/main.py
 import os
 import glob
+import math
 from math import sqrt
 import torch
 import torch.nn as nn
+import torch.nn.init as init
 import torch.backends.cudnn as cudnn
 import torchvision
 import torchvision.transforms as transforms
@@ -43,15 +45,50 @@ LEARNING_RATE = 0.001 # DEAFULT IS 0.001
 ##################################
 # Here we consistently use RGB color space and LEARNING_RATE = 0.001
 #
-# Max pooling: (73% goalnet)
+# Max pooling (default): (73% goalnet)
 # Avg pooling: (59% goalnet)
 # Max + Avg + Max (72% goalnet)
 # Avg + Max + Avg (62 % goalnet)
-#
 # Max pooling, 8x8,4x4,2x2: (73% goalnet)
 # Max pooling, 16x16,9x9,2x2 (68% goalnet)
 # Max pooling, 24x24,4x4,2x2 (65% goalnet)
+##################################
+##    CUSTOM INITIALIZATION     ##
+##################################
+# As before, SPACE = "RGB" and LEARNING_RATE = 0.001.
+# Biases are initialized as well
+#
+# Kaiming uniform (default): (73% goalnet)
+# Kaiming normal: (71% goalnet)
+# Normal (m=0, std=1):  (10% goalnet)
+# Uniform (a=0, b= 1): (20% goalnet)
+# Ones: (10% goalnet)
+# Zeros (10% goalnet)
+##################################
+##          IMAGE SIZE          ##
+##################################
+# Default settings as usual.
+# The resized image is fed to the first convolution layer,
+# and then downsampled to (32x32) by the following pooling layer.
+#
+# (32x32) (default): (73% goalnet)
+# (64x64): (73% goalnet)
+# (16x16): (65% goalnet)
+##################################
+##        Learning rate         ##
+##################################
+# Default settings.
+#
+# lr = 0.001 (default): (73% goalnet)
+# lr = 0.0001: (70% goalnet)
+# lr = 0.00001: (50% goalnet)
+# lr = 0.01: (49% goalnet)
+# lr = 0.1: (10% goalnet)
 ##################################
 ##        PREPARE DATA          ##
 ##################################
@@ -59,11 +96,13 @@ LEARNING_RATE = 0.001 # DEAFULT IS 0.001
 CHANNELS = 1 if SPACE == "L" else 3
 MEAN = MEAN_MAP[SPACE]
 STD = MEAN_MAP[SPACE]
+IMG_SCALE = 1
 def to_color_space(img):
    return img.convert(SPACE)
 transform_comp = transforms.Compose([
+    transforms.Resize((int(IMG_SCALE*32),int(IMG_SCALE*32))),
    to_color_space,
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
@@ -92,13 +131,30 @@ testloader = torch.utils.data.DataLoader(testset,
 ##        PREPARE NETWORK       ##
 ##################################
+def init_weights(m):
+    if type(m) in {nn.Conv2d, nn.Linear}:
+        init.uniform_(m.weight)
+        init.uniform_(m.bias)
+        #Below is the DEFAULT initialization:
+        #
+        #init.kaiming_uniform_(m.weight, a=math.sqrt(5))
+        #
+        #fan_in, _ = init._calculate_fan_in_and_fan_out(m.weight)
+        #bound = 1 / math.sqrt(fan_in)
+        #init.uniform_(m.bias, -bound, bound)
 use_cuda = torch.cuda.is_available()
 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')
 #model = cvlNet()
-model = goalNet(CHANNELS)
+model = goalNet(CHANNELS, IMG_SCALE)
+#initialize weights with function specified in 'init_weights'
+#model.apply(init_weights)
 if use_cuda:
    model.cuda()
@@ -108,6 +164,8 @@ objective = nn.CrossEntropyLoss()
 optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
 ##################################
 ##         TRAIN NETWORK         ##
 ##################################

--- a/python/models/goalNet.py
+++ b/python/models/goalNet.py
@@ -5,7 +5,7 @@ import torch.nn.functional as F
 class goalNet(nn.Module):
-    def __init__(self, channels):
+    def __init__(self, channels, img_scale):
        super(goalNet, self).__init__()
@@ -15,15 +15,18 @@ class goalNet(nn.Module):
        self.seq1 = nn.Sequential(
            # Input rgb channels, output 32 channels (32 parallel computations), kernel size 5x5
            nn.Conv2d(channels, 32, 5, padding=2),
            # outputs 16x16xc featurmap. Stride 's' downscales by a factor 's'
            #nn.MaxPool2d(kernel_size=24, padding=11, stride=2),
            #nn.MaxPool2d(kernel_size=16, padding=7, stride=2),
-            nn.MaxPool2d(kernel_size=8, padding=4, stride=2),            
+            nn.MaxPool2d(kernel_size=8, padding=4, stride= int(2 * img_scale)),
            #nn.AvgPool2d(kernel_size=8, padding=4, stride=2),
            nn.ReLU())
        self.seq2 = nn.Sequential(
            nn.Conv2d(32, 32, 5, padding=2), nn.ReLU(),
            #nn.MaxPool2d(kernel_size=4, padding=1, stride=2)
            #nn.MaxPool2d(kernel_size=9, padding=0, stride=1)
            nn.MaxPool2d(kernel_size=4, padding=2, stride=2)
@@ -38,6 +41,7 @@ class goalNet(nn.Module):
        # Fully connected since expects a 4x4xc feature map
        self.fc1 = nn.Conv2d(64, 64, 4)
        self.relu4 = nn.ReLU()
        # One prediction layer based on extracted features
        self.prediction = nn.Linear(64, 10)
        self.loss = nn.LogSoftmax(1)