Skip to content
Snippets Groups Projects
Commit 9cff78f9 authored by dansa828's avatar dansa828
Browse files

update

parent 5ef16c69
Branches
Tags
No related merge requests found
......@@ -32,7 +32,6 @@
// Use these for setting shared memory size.
#define maxKernelSizeX 10
#define maxKernelSizeY 10
#define tileSize 8
// unsigned char median_kernel(skepu::Region2D<unsigned char> image, size_t elemPerPx)
/*__global__ void median_filter(unsigned char *image, unsigned char *out, const unsigned int imagesizex, const unsigned int imagesizey, const int kernelsizex, const int kernelsizey)
......@@ -118,17 +117,9 @@
}*/
__global__ void gaussian_filter(unsigned char *image, unsigned char *out, const unsigned int imagesizex, const unsigned int imagesizey, const int kernelsizex, const int kernelsizey, unsigned int *stencil)
{
stencil[0] = 1;
stencil[1] = 4;
stencil[2] = 6;
stencil[3] = 4;
stencil[4] = 1;
//__shared__ unsigned char patch[(32*3+(2*maxKernelSizeX*3))*(32+2*maxKernelSizeY)];
const int block_w = 32*3+(2*maxKernelSizeX);
const int block_h = 32+(2*maxKernelSizeY);
const int tile = 32 - 2*maxKernelSizeX;
__shared__ unsigned char patch[block_w*block_h];
const int tile = blockDim.x - 2*maxKernelSizeX;
__shared__ unsigned char patch[32*3*32];
// map from blockIdx to pixel position
int g_x = blockIdx.x * tile + threadIdx.x-kernelsizex;
int g_y = blockIdx.y * tile + threadIdx.y-kernelsizey;
......@@ -156,32 +147,30 @@ __global__ void gaussian_filter(unsigned char *image, unsigned char *out, const
unsigned int sumx, sumy, sumz;
int dy, dx;
sumx=0;sumy=0;sumz=0;
int j = 0;
for(dy=-kernelsizey;dy<=kernelsizey;dy++)
{
for(dx=-kernelsizex;dx<=kernelsizex;dx++)
{
sumx += patch[(l_i+(dy*blockDim.x)+dx)*3+0]*stencil[dy+kernelsizey];
sumy += patch[(l_i+(dy*blockDim.x)+dx)*3+1]*stencil[dy+kernelsizey];
sumz += patch[(l_i+(dy*blockDim.x)+dx)*3+2]*stencil[dy+kernelsizey];
sumx += patch[(l_i+(dy*blockDim.x)+dx)*3+0]*stencil[j];
sumy += patch[(l_i+(dy*blockDim.x)+dx)*3+1]*stencil[j];
sumz += patch[(l_i+(dy*blockDim.x)+dx)*3+2]*stencil[j];
j++;
}
}
int divby = (2*kernelsizex+1)*(2*kernelsizey+1); // Works for box filters only!
out[g_i*3+0] = sumx/divby;
out[g_i*3+1] = sumy/divby;
out[g_i*3+2] = sumz/divby;
out[g_i*3+0] = sumx/16;
out[g_i*3+1] = sumy/16;
out[g_i*3+2] = sumz/16;
}
}
__global__ void median_filter(unsigned char *image, unsigned char *out, const unsigned int imagesizex, const unsigned int imagesizey, const int kernelsizex, const int kernelsizey)
{
const int block_w = 32*3+(2*maxKernelSizeX);
const int block_h = 32+(2*maxKernelSizeY);
const int tile = 32 - 2*maxKernelSizeX;
__shared__ unsigned char patch[block_w*block_h];
const int tile = blockDim.x - 2*maxKernelSizeX;
__shared__ unsigned char patch[32*3*32];
// map from blockIdx to pixel position
int g_x = blockIdx.x * tile + threadIdx.x-kernelsizex;
int g_y = blockIdx.y * tile + threadIdx.y-kernelsizey;
......@@ -257,11 +246,8 @@ __global__ void gaussian_filter(unsigned char *image, unsigned char *out, const
}
__global__ void filter(unsigned char *image, unsigned char *out, const unsigned int imagesizex, const unsigned int imagesizey, const int kernelsizex, const int kernelsizey)
{
//__shared__ unsigned char patch[(32*3+(2*maxKernelSizeX*3))*(32+2*maxKernelSizeY)];
const int block_w = 32*3+(2*maxKernelSizeX);
const int block_h = 32+(2*maxKernelSizeY);
const int tile = 32 - 2*maxKernelSizeX;
__shared__ unsigned char patch[block_w*block_h];
const int tile = blockDim.x - 2*maxKernelSizeX;
__shared__ unsigned char patch[32*3*32];
// map from blockIdx to pixel position
int g_x = blockIdx.x * tile + threadIdx.x-kernelsizex;
int g_y = blockIdx.y * tile + threadIdx.y-kernelsizey;
......@@ -326,7 +312,8 @@ __global__ void filter(unsigned char *image, unsigned char *out, const unsigned
unsigned char *image, *pixels, *dev_bitmap, *dev_input;
unsigned int imagesizey, imagesizex; // Image size
unsigned int *stencil;
unsigned int stencil[5] = {1,4,6,4,1};
unsigned int *stencilcuda;
////////////////////////////////////////////////////////////////////////////////
// main computation function
......@@ -351,26 +338,20 @@ void computeImages(int kernelsizex, int kernelsizey)
//dim3 grid((imagesizex + blockSize.x - 1)/blockSize.x, (imagesizey + blockSize.y - 1)/blockSize.y);
//Task 1
//filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
//Task 2
//filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
//cudaDeviceSynchronize();
//filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizey, kernelsizex);
/*
// Gaussian
filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
for(int = 0; i < 5; i++) {
filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
} */
//Gaussian
cudaMalloc( (void**)&stencil, (kernelsizex*2)+1);
gaussian_filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey, stencil);
gaussian_filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizey, kernelsizex, stencil);
//Median
//Task 3 Gaussian
/*cudaMalloc( (void**)&stencilcuda, 5*sizeof(unsigned int));
cudaMemcpy( stencilcuda, stencil, 5*sizeof(unsigned int), cudaMemcpyHostToDevice);
gaussian_filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey, stencilcuda);
cudaDeviceSynchronize();
gaussian_filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizey, kernelsizex, stencilcuda);*/
//Task 4 Median
//median_filter<<<grid,blockSize>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, kernelsizey);
cudaThreadSynchronize();
// Check for errors!
......@@ -380,8 +361,7 @@ void computeImages(int kernelsizex, int kernelsizey)
cudaMemcpy( pixels, dev_bitmap, imagesizey*imagesizex*3, cudaMemcpyDeviceToHost );
cudaFree( dev_bitmap );
cudaFree( dev_input );
cudaFree(stencil);
cudaFree(stencilcuda);
}
// Display images
......@@ -429,7 +409,7 @@ int main( int argc, char** argv)
ResetMilli();
//Task 1,4
//computeImages(2, 2);
//computeImages(7, 7);
//Task 2
computeImages(2, 1);
computeImages(1, 2);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment