Skip to content
Snippets Groups Projects
Commit 4d95f649 authored by matbe320's avatar matbe320
Browse files

Lab5: Stuck on fetching... again...

parent f3ee549d
No related branches found
No related tags found
No related merge requests found
...@@ -101,7 +101,7 @@ __global__ void filter_separable(unsigned char *image, unsigned char *out, const ...@@ -101,7 +101,7 @@ __global__ void filter_separable(unsigned char *image, unsigned char *out, const
{ {
__shared__ unsigned char local_memory [STATIC_SHARED_MEM_SIZE_SEP * 3]; __shared__ unsigned char local_memory [STATIC_SHARED_MEM_SIZE_SEP * 3];
// map from blockIdx to pixel position // map from blockIdx to pixel position
unsigned threadsPerBlock = blockDim.x * blockDim.y; unsigned threadsPerBlock = blockDim.x;
int blockOffsetX = blockIdx.x * blockDim.x; int blockOffsetX = blockIdx.x * blockDim.x;
int blockOffsetY = blockIdx.y * blockDim.y; int blockOffsetY = blockIdx.y * blockDim.y;
...@@ -109,10 +109,12 @@ __global__ void filter_separable(unsigned char *image, unsigned char *out, const ...@@ -109,10 +109,12 @@ __global__ void filter_separable(unsigned char *image, unsigned char *out, const
int local_index = threadIdx.x; int local_index = threadIdx.x;
int FILTER_SIZE = (kernelsize * 2 + 1) + (2 * kernelsize); int FILTER_SIZE = (kernelsize * 2 + 1) + (2 * kernelsize);
while(local_index < FILTER_SIZE) while(local_index < FILTER_SIZE)
{ {
int local_corner_px = blockOffsetY * imagesizex + blockOffsetX; int global_x = max(0, blockOffsetX - kernelsize + local_index);
int global_index = max(0, local_corner_px - kernelsize * stride + local_index); int global_y = max(0, blockOffsetY);
unsigned global_index = global_y * imagesizex + global_x;
local_memory[3*local_index+0] = image[3*global_index+0]; local_memory[3*local_index+0] = image[3*global_index+0];
local_memory[3*local_index+1] = image[3*global_index+1]; local_memory[3*local_index+1] = image[3*global_index+1];
...@@ -214,13 +216,14 @@ void computeImages(int kernelsizex, int kernelsizey) ...@@ -214,13 +216,14 @@ void computeImages(int kernelsizex, int kernelsizey)
dim3 gridsize_sep(imagesizex/(kernelsizex*2+1), imagesizey); dim3 gridsize_sep(imagesizex/(kernelsizex*2+1), imagesizey);
dim3 blocksize_sep(kernelsizex*2+1); dim3 blocksize_sep(kernelsizex*2+1);
unsigned char *tmp = dev_input; // unsigned char *tmp = dev_input;
ResetMilli(); ResetMilli();
/* X-direction */ /* X-direction */
filter_separable<<<gridsize_sep, blocksize_sep>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, imagesizex); filter_separable<<<gridsize_sep, blocksize_sep>>>(dev_input, dev_bitmap, imagesizex, imagesizey, kernelsizex, 1);
cudaThreadSynchronize(); cudaThreadSynchronize();
/* Y-direction */ /* Y-direction */
// filter_separable<<<gridsize_sep, blocksize_sep>>>(tmp, dev_bitmap, imagesizex, imagesizey, kernelsizex, imagesizex); // dim3 gridsize_sep(imagesizex, imagesizey/(kernelsizex*2+1));
// filter_separable<<<gridsize_sep, blocksize_sep>>>(tmp, dev_bitmap, imagesizex, imagesizey, kernelsizex, 1);
cudaThreadSynchronize(); cudaThreadSynchronize();
printf("[Separable] %f\n", GetSeconds()); printf("[Separable] %f\n", GetSeconds());
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment