diff --git a/Lab5/filter.cu b/Lab5/filter.cu
index cdc962b24660195ccf620ec43ed350cede11c748..315145de762fb4a221c2e2be4d37836be2561024 100755
--- a/Lab5/filter.cu
+++ b/Lab5/filter.cu
@@ -31,19 +31,15 @@
 #include "milli.h"
 
 // Use these for setting shared memory size.
-#define RADIUS 12
-#define BLOCK_H 32
-#define BLOCK_W 32
-#define FILTER_W (RADIUS*2+BLOCK_W) // 3 pixels each
-#define FILTER_H (RADIUS*2+BLOCK_H)
-#define maxKernelSizeX BLOCK_W
-#define maxKernelSizeY BLOCK_H
-
-// Number of pixels in local memory
-#define LOCAL_MEM_SIZE_PX (FILTER_W * FILTER_H)
+#define RADIUS 3
+
+#define maxKernelSizeX (RADIUS*2+1)
+#define maxKernelSizeY (RADIUS*2+1)
 
-// #define BLOCK_SIZE 16 // total number of threads per block = 256
-// #define GRID_SIZE 8  // total number of blocks per grid = 256
+#define FILTER_W (maxKernelSizeX+RADIUS*2)
+#define FILTER_H (maxKernelSizeY+RADIUS*2)
+
+#define LOCAL_MEM_SIZE_PX (FILTER_W * FILTER_H)
 
 __global__ void filter(unsigned char *image, unsigned char *out, const unsigned int imagesizex, const unsigned int imagesizey, const int kernelsizex, const int kernelsizey)
 {
@@ -54,8 +50,7 @@ __global__ void filter(unsigned char *image, unsigned char *out, const unsigned
   unsigned threadNumInBlock = threadIdx.x + blockDim.x *threadIdx.y;
   unsigned threadsPerBlock  = blockDim.x * blockDim.y;
 
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  int y = blockIdx.y * blockDim.y + threadIdx.y;
+
 
   int blockOffsetX = blockIdx.x * blockDim.x;
   int blockOffsetY = blockIdx.y * blockDim.y;
@@ -75,65 +70,43 @@ __global__ void filter(unsigned char *image, unsigned char *out, const unsigned
     local_index += threadsPerBlock;
   }
 
-    int dy, dx;
+  int dy, dx;
   unsigned int sumx, sumy, sumz;
   /* Old factor */
-//   int divby = (2*kernelsizex*3+1)*(2*kernelsizey+1); // Works for box filters only!
-// 	if (x < imagesizex && y < imagesizey) // If inside image
-// 	{
-// // Filter kernel (simple box filter)
-// 	sumx=0;sumy=0;sumz=0;
-// 	for(dy=-kernelsizey;dy<=kernelsizey;dy++)
-// 		for(dx=-kernelsizex;dx<=kernelsizex;dx++)
-// 		{
-// 			// Use max and min to avoid branching!
-// 			int yy = min(max(y+dy, 0), BLOCK_H - 1);
-// 			int xx = min(max(x+dx, 0), BLOCK_W - 1);
-//
-//       // printf("This is the index value: %i for thread %i\n", ((yy)*imagesizex+(xx))*3+0, threadid);
-//       sumx += local_memory[((yy)*(BLOCK_W+RADIUS)+(xx))*3+0];
-//       sumy += local_memory[((yy)*(BLOCK_W+RADIUS)+(xx))*3+1];
-//       sumz += local_memory[((yy)*(BLOCK_W+RADIUS)+(xx))*3+2];
-//       // printf("This is the sumx: %i sumy: %i sumz: %i\n", sumx, sumy, sumz);
-// 		}
+  int divby = (2*kernelsizex*3+1)*(2*kernelsizey+1); // Works for box filters only!
+  int skip_pixels =  FILTER_W * RADIUS + RADIUS;
+
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+  int local_x = threadIdx.x;
+  int local_y = threadIdx.y;
+
+  int local_mem_address = skip_pixels + (local_y * FILTER_W)  + local_x;
+
+ 	// if (x < imagesizex && y < imagesizey) // If inside image
+ 	// {
+ // Filter kernel (simple box filter)
+ 	sumx=0;sumy=0;sumz=0;
+ 	for(dy=-kernelsizey;dy<=kernelsizey;dy++)
+ 		for(dx=-kernelsizex;dx<=kernelsizex;dx++)
+ 		{
+       int mem_access = skip_pixels + (local_y + dy) * FILTER_W + (local_x + dx);
+
+       sumx += local_memory[mem_access*3+0];
+       sumy += local_memory[mem_access*3+1];
+       sumz += local_memory[mem_access*3+2];
+ 		}
 
   __syncthreads();
+  // out[3*(y*imagesizex+x)+0] = local_memory[3*local_mem_address+0];
+  // out[3*(y*imagesizex+x)+1] = local_memory[3*local_mem_address+1];
+  // out[3*(y*imagesizex+x)+2] = local_memory[3*local_mem_address+2];
 
-  for(dy=-kernelsizey;dy<=kernelsizey;dy++)
-		for(dx=-kernelsizex;dx<=kernelsizex;dx++)
-    {
-      int yy = min(max(y+dy, 0), BLOCK_H - 1);
-      int xx = min(max(x+dx, 0), BLOCK_W - 1);
-
-      // out[y*imagesizex+(x*3+0)] = local_memory[yy*(BLOCK_W+RADIUS)+(xx*3+0)];
-      // out[y*imagesizex+(x*3+1)] = local_memory[yy*(BLOCK_W+RADIUS)+(xx*3+1)];
-      // out[y*imagesizex+(x*3+2)] = local_memory[yy*(BLOCK_W+RADIUS)+(xx*3+2)];
-    }
-
-    int local_x = threadIdx.x;
-    int local_y = threadIdx.y;
-   //  if((local_x + local_y) == 0)
-   //    printf("This is the adress we're trying to reach in local memory: %i\n", FILTER_W*RADIUS+local_y*FILTER_W+3*(local_x+RADIUS)+0);
-   /* Escape top + escape Y + escape start x */
-   int local_mem_address = FILTER_W * RADIUS + (local_y * FILTER_W) + RADIUS + local_x;
-      // int local_mem_address = (BLOCK_W+RADIUS*2)*RADIUS*3 + (local_y*2*RADIUS+RADIUS) + (3*(local_y*BLOCK_W + RADIUS * 2) + local_x);
-  if((blockIdx.x + blockIdx.y == 0) && (threadIdx.y == 5) && (threadIdx.x == 5))
-    printf("In Outprint:\nWe are thread %i. Local mem [%i]\n", threadIdx.y * BLOCK_H + threadIdx.x,local_mem_address);
-   out[3*(y*imagesizex+x)+0] = local_memory[3*local_mem_address+0];
-   out[3*(y*imagesizex+x)+1] = local_memory[3*local_mem_address+1];
-   out[3*(y*imagesizex+x)+2] = local_memory[3*local_mem_address+2];
-
-   // printf("Global id: %i Local Mem: %i Size of shared memory is: %i FILTER_W: %i FILTER_H: %i\n", threadid, local_mem_address, FILTER_W*FILTER_H, FILTER_W,FILTER_H);
-    // out[3*(y*imagesizex+x)+0] = image[3*(y*imagesizex+x)+0];
-    // out[3*(y*imagesizex+x)+1] = image[3*(y*imagesizex+x)+1];
-    // out[3*(y*imagesizex+x)+2] = image[3*(y*imagesizex+x)+2];
-
-  // printf("Filter_W: %i Filter_H: %i ", )
-/*
   out[(y*imagesizex+x)*3+0] = sumx/divby;
 	out[(y*imagesizex+x)*3+1] = sumy/divby;
 	out[(y*imagesizex+x)*3+2] = sumz/divby;
-*/
+
 
 }
 
@@ -240,7 +213,7 @@ int main( int argc, char** argv)
 
 	ResetMilli();
 
-	computeImages(2, 2);
+	computeImages(RADIUS, RADIUS);
 // You can save the result to a file like this:
 	writeppm("out.ppm", imagesizey, imagesizex, pixels);