Skip to content
Snippets Groups Projects
Commit 36952f9d authored by dansa828's avatar dansa828
Browse files

progress

parent dae5dd98
Branches
No related tags found
No related merge requests found
...@@ -28,8 +28,11 @@ unsigned char average_kernel(skepu::Region2D<unsigned char> m, size_t elemPerPx) ...@@ -28,8 +28,11 @@ unsigned char average_kernel(skepu::Region2D<unsigned char> m, size_t elemPerPx)
unsigned char average_kernel_1d(skepu::Region1D<unsigned char> m, size_t elemPerPx) unsigned char average_kernel_1d(skepu::Region1D<unsigned char> m, size_t elemPerPx)
{ {
// your code here float scaling = 1.0 / (m.oi*2+1);
return m(0); float res = 0;
for (int y = -m.oi; y <= m.oi; ++y)
res += m(y);
return res * scaling;
} }
...@@ -50,19 +53,19 @@ int main(int argc, char* argv[]) ...@@ -50,19 +53,19 @@ int main(int argc, char* argv[])
std::cout << "Usage: " << argv[0] << " input output radius [backend]\n"; std::cout << "Usage: " << argv[0] << " input output radius [backend]\n";
exit(1); exit(1);
} }
LodePNGColorType colorType = LCT_RGB; LodePNGColorType colorType = LCT_RGB;
std::string inputFileName = argv[1]; std::string inputFileName = argv[1];
std::string outputFileName = argv[2]; std::string outputFileName = argv[2];
const int radius = atoi(argv[3]); const int radius = atoi(argv[3]);
auto spec = skepu::BackendSpec{argv[4]}; auto spec = skepu::BackendSpec{argv[4]};
skepu::setGlobalBackendSpec(spec); skepu::setGlobalBackendSpec(spec);
// Create the full path for writing the image. // Create the full path for writing the image.
std::stringstream ss; std::stringstream ss;
ss << (2 * radius + 1) << "x" << (2 * radius + 1); ss << (2 * radius + 1) << "x" << (2 * radius + 1);
std::string outputFile = outputFileName + ss.str(); std::string outputFile = outputFileName + ss.str();
// Read the padded image into a matrix. Create the output matrix without padding. // Read the padded image into a matrix. Create the output matrix without padding.
// Padded version for 2D MapOverlap, non-padded for 1D MapOverlap // Padded version for 2D MapOverlap, non-padded for 1D MapOverlap
ImageInfo imageInfo; ImageInfo imageInfo;
...@@ -70,56 +73,63 @@ int main(int argc, char* argv[]) ...@@ -70,56 +73,63 @@ int main(int argc, char* argv[])
skepu::Matrix<unsigned char> inputMatrix = ReadPngFileToMatrix(inputFileName, colorType, imageInfo); skepu::Matrix<unsigned char> inputMatrix = ReadPngFileToMatrix(inputFileName, colorType, imageInfo);
skepu::Matrix<unsigned char> outputMatrix(imageInfo.height, imageInfo.width * imageInfo.elementsPerPixel, 120); skepu::Matrix<unsigned char> outputMatrix(imageInfo.height, imageInfo.width * imageInfo.elementsPerPixel, 120);
// more containers...? // more containers...?
// Original version // Original version
{ {
auto conv = skepu::MapOverlap(average_kernel); auto conv = skepu::MapOverlap(average_kernel);
conv.setOverlap(radius, radius * imageInfo.elementsPerPixel); conv.setOverlap(radius, radius * imageInfo.elementsPerPixel);
auto timeTaken = skepu::benchmark::measureExecTime([&] auto timeTaken = skepu::benchmark::measureExecTime([&]
{ {
conv(outputMatrix, inputMatrixPad, imageInfo.elementsPerPixel); conv(outputMatrix, inputMatrixPad, imageInfo.elementsPerPixel);
}); });
WritePngFileMatrix(outputMatrix, outputFile + "-average.png", colorType, imageInfo); WritePngFileMatrix(outputMatrix, outputFile + "-average.png", colorType, imageInfo);
std::cout << "Time for combined: " << (timeTaken.count() / 10E6) << "\n"; std::cout << "Time for combined: " << (timeTaken.count() / 10E6) << "\n";
} }
// Separable version // Separable version
// use conv.setOverlapMode(skepu::Overlap::[ColWise RowWise]); // use conv.setOverlapMode(skepu::Overlap::[ColWise RowWise]);
// and conv.setOverlap(<integer>) // and conv.setOverlap(<integer>)
{ {
auto conv = skepu::MapOverlap(average_kernel_1d); auto conv1 = skepu::MapOverlap(average_kernel_1d);
//auto conv2 = skepu::MapOverlap(average_kernel_1d);
conv1.setOverlapMode(skepu::Overlap::ColWise);
conv1.setOverlap(radius*imageInfo.elementsPerPixel);
//conv2.setOverlapMode(skepu::Overlap::RowWise);
//conv2.setOverlap(radius*imageInfo.elementsPerPixel);
auto timeTaken = skepu::benchmark::measureExecTime([&] auto timeTaken = skepu::benchmark::measureExecTime([&]
{ {
// your code here // your code here
conv1(outputMatrix, inputMatrixPad, imageInfo.elementsPerPixel);
//conv2(outputMatrix, inputMatrixPad, imageInfo.elementsPerPixel);
}); });
// WritePngFileMatrix(outputMatrix, outputFile + "-separable.png", colorType, imageInfo); WritePngFileMatrix(outputMatrix, outputFile + "-separable.png", colorType, imageInfo);
std::cout << "Time for separable: " << (timeTaken.count() / 10E6) << "\n"; std::cout << "Time for separable: " << (timeTaken.count() / 10E6) << "\n";
} }
// Separable gaussian // Separable gaussian
{ {
skepu::Vector<float> stencil = sampleGaussian(radius); skepu::Vector<float> stencil = sampleGaussian(radius);
// skeleton instance, etc here (remember to set backend) // skeleton instance, etc here (remember to set backend)
auto timeTaken = skepu::benchmark::measureExecTime([&] auto timeTaken = skepu::benchmark::measureExecTime([&]
{ {
// your code here // your code here
}); });
// WritePngFileMatrix(outputMatrix, outputFile + "-gaussian.png", colorType, imageInfo); // WritePngFileMatrix(outputMatrix, outputFile + "-gaussian.png", colorType, imageInfo);
std::cout << "Time for gaussian: " << (timeTaken.count() / 10E6) << "\n"; std::cout << "Time for gaussian: " << (timeTaken.count() / 10E6) << "\n";
} }
return 0;
}
return 0;
}
...@@ -20,7 +20,15 @@ float userfunction(...) ...@@ -20,7 +20,15 @@ float userfunction(...)
// more user functions... // more user functions...
*/ */
float multiply(float a, float b)
{
return a*b;
}
float add(float a, float b)
{
return a+b;
}
int main(int argc, const char* argv[]) int main(int argc, const char* argv[])
{ {
...@@ -29,41 +37,46 @@ int main(int argc, const char* argv[]) ...@@ -29,41 +37,46 @@ int main(int argc, const char* argv[])
std::cout << "Usage: " << argv[0] << " <input size> <backend>\n"; std::cout << "Usage: " << argv[0] << " <input size> <backend>\n";
exit(1); exit(1);
} }
const size_t size = std::stoul(argv[1]); const size_t size = std::stoul(argv[1]);
auto spec = skepu::BackendSpec{argv[2]}; auto spec = skepu::BackendSpec{argv[2]};
// spec.setCPUThreads(<integer value>); // spec.setCPUThreads(<integer value>);
skepu::setGlobalBackendSpec(spec); skepu::setGlobalBackendSpec(spec);
/* Skeleton instances */ /* Skeleton instances */
// auto instance = skepu::Map(userfunction); // auto instance = skepu::Map(userfunction);
// ... auto dotprodMap = skepu::MapReduce<2>(multiply, add);
auto multMap = skepu::Map<2>(multiply);
auto addReduce = skepu::Reduce(add);
/* SkePU containers */ /* SkePU containers */
skepu::Vector<float> v1(size, 1.0f), v2(size, 2.0f); skepu::Vector<float> v1(size, 1.0f), v2(size, 2.0f), v3(size, 1.0f);
/* Compute and measure time */ /* Compute and measure time */
float resComb, resSep; float resComb, resSep;
auto timeComb = skepu::benchmark::measureExecTime([&] auto timeComb = skepu::benchmark::measureExecTime([&]
{ {
// your code here resComb = dotprodMap(v1, v2);
}); });
auto timeSep = skepu::benchmark::measureExecTime([&] auto timeSep = skepu::benchmark::measureExecTime([&]
{ {
// your code here multMap(v3, v1, v2);
resSep = addReduce(v3);
}); });
std::cout << "Time Combined: " << (timeComb.count() / 10E6) << " seconds.\n"; std::cout << "Time Combined: " << (timeComb.count() / 10E6) << " seconds.\n";
std::cout << "Time Separate: " << ( timeSep.count() / 10E6) << " seconds.\n"; std::cout << "Time Separate: " << ( timeSep.count() / 10E6) << " seconds.\n";
std::cout << "Result Combined: " << resComb << "\n"; std::cout << "Result Combined: " << resComb << "\n";
std::cout << "Result Separate: " << resSep << "\n"; std::cout << "Result Separate: " << resSep << "\n";
return 0; return 0;
} }
...@@ -33,9 +33,10 @@ If you need to use the vector that the Map returns to anything else in the progr ...@@ -33,9 +33,10 @@ If you need to use the vector that the Map returns to anything else in the progr
#### Question 1.3: Is there a SkePU backend which is always more efficient to use, or does this depend on the problem size? Why? Either show with measurements or provide a valid reasoning. #### Question 1.3: Is there a SkePU backend which is always more efficient to use, or does this depend on the problem size? Why? Either show with measurements or provide a valid reasoning.
CPU: Small problems sizes will be faster because the clock frequency of the CPU is faster than the GPU. CPU: Small problems sizes will be faster because the clock frequency of the CPU is faster than the GPU.
GPU: Big problem sizes will be faster because there are many more cores in the GPU. GPU: Big problem sizes will be faster because there are many more cores in the GPU. Need big problems parallelizable to make use of the GPU, it takes time to send from CPU to GPU.
#### Question 1.4: Try measuring the parallel back-ends with measureExecTime exchanged for measureExecTimeIdempotent. This measurement does a "cold run"of the lambda expression before running the proper measurement. Do you see a difference for some backends, and if so, why? #### Question 1.4: Try measuring the parallel back-ends with measureExecTime exchanged for measureExecTimeIdempotent. This measurement does a "cold run"of the lambda expression before running the proper measurement. Do you see a difference for some backends, and if so, why?
Especially for OpenCL, the bottleneck is loading the data from the CPU to the GPU.
#### Question 2.1: Which version of the averaging filter (unified, separable) is the most efficient? Why? #### Question 2.1: Which version of the averaging filter (unified, separable) is the most efficient? Why?
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment