progress

36952f9d · dansa828 · dae5dd98 · 36952f9d · 36952f9d · 36952f9d
Commit 36952f9d authored 4 years ago by dansa828
--- a/Lab3/average.cpp
+++ b/Lab3/average.cpp
@@ -28,8 +28,11 @@ unsigned char average_kernel(skepu::Region2D<unsigned char> m, size_t elemPerPx)
 unsigned char average_kernel_1d(skepu::Region1D<unsigned char> m, size_t elemPerPx)
 {
-	// your code here
+	float scaling = 1.0 / (m.oi*2+1);
-	return m(0);
+	float res = 0;
+	for (int y = -m.oi; y <= m.oi; ++y)
+			res += m(y);
+	return res * scaling;
 }
@@ -50,19 +53,19 @@ int main(int argc, char* argv[])
 		std::cout << "Usage: " << argv[0] << " input output radius [backend]\n";
 		exit(1);
 	}
 	LodePNGColorType colorType = LCT_RGB;
 	std::string inputFileName = argv[1];
 	std::string outputFileName = argv[2];
 	const int radius = atoi(argv[3]);
 	auto spec = skepu::BackendSpec{argv[4]};
 	skepu::setGlobalBackendSpec(spec);
 	// Create the full path for writing the image.
 	std::stringstream ss;
 	ss << (2 * radius + 1) << "x" << (2 * radius + 1);
 	std::string outputFile = outputFileName + ss.str();
 	// Read the padded image into a matrix. Create the output matrix without padding.
 	// Padded version for 2D MapOverlap, non-padded for 1D MapOverlap
 	ImageInfo imageInfo;
@@ -70,56 +73,63 @@ int main(int argc, char* argv[])
 	skepu::Matrix<unsigned char> inputMatrix = ReadPngFileToMatrix(inputFileName, colorType, imageInfo);
 	skepu::Matrix<unsigned char> outputMatrix(imageInfo.height, imageInfo.width * imageInfo.elementsPerPixel, 120);
 	// more containers...?
 	// Original version
 	{
 		auto conv = skepu::MapOverlap(average_kernel);
 		conv.setOverlap(radius, radius  * imageInfo.elementsPerPixel);
 		auto timeTaken = skepu::benchmark::measureExecTime([&]
 		{
 			conv(outputMatrix, inputMatrixPad, imageInfo.elementsPerPixel);
 		});
 		WritePngFileMatrix(outputMatrix, outputFile + "-average.png", colorType, imageInfo);
 		std::cout << "Time for combined: " << (timeTaken.count() / 10E6) << "\n";
 	}
 	// Separable version
 	// use conv.setOverlapMode(skepu::Overlap::[ColWise RowWise]);
 	// and conv.setOverlap(<integer>)
 	{
-		auto conv = skepu::MapOverlap(average_kernel_1d);
+		auto conv1 = skepu::MapOverlap(average_kernel_1d);
+		//auto conv2 = skepu::MapOverlap(average_kernel_1d);
+		conv1.setOverlapMode(skepu::Overlap::ColWise);
+		conv1.setOverlap(radius*imageInfo.elementsPerPixel);
+		//conv2.setOverlapMode(skepu::Overlap::RowWise);
+		//conv2.setOverlap(radius*imageInfo.elementsPerPixel);
 		auto timeTaken = skepu::benchmark::measureExecTime([&]
 		{
 			// your code here
+			conv1(outputMatrix, inputMatrixPad, imageInfo.elementsPerPixel);
+			//conv2(outputMatrix, inputMatrixPad, imageInfo.elementsPerPixel);
 		});
-	//	WritePngFileMatrix(outputMatrix, outputFile + "-separable.png", colorType, imageInfo);
+		WritePngFileMatrix(outputMatrix, outputFile + "-separable.png", colorType, imageInfo);
 		std::cout << "Time for separable: " << (timeTaken.count() / 10E6) << "\n";
 	}
 	// Separable gaussian
 	{
 		skepu::Vector<float> stencil = sampleGaussian(radius);
 		// skeleton instance, etc here (remember to set backend)
 		auto timeTaken = skepu::benchmark::measureExecTime([&]
 		{
 			// your code here
 		});
 	//	WritePngFileMatrix(outputMatrix, outputFile + "-gaussian.png", colorType, imageInfo);
 		std::cout << "Time for gaussian: " << (timeTaken.count() / 10E6) << "\n";
 	}
-	return 0;
-}
+	return 0;
+}
--- a/Lab3/dotproduct.cpp
+++ b/Lab3/dotproduct.cpp
@@ -20,7 +20,15 @@ float userfunction(...)
 // more user functions...
 */
+float multiply(float a, float b)
+{
+	return a*b;
+}
+float add(float a, float b)
+{
+	return a+b;
+}
 int main(int argc, const char* argv[])
 {
@@ -29,41 +37,46 @@ int main(int argc, const char* argv[])
 		std::cout << "Usage: " << argv[0] << " <input size> <backend>\n";
 		exit(1);
 	}
 	const size_t size = std::stoul(argv[1]);
 	auto spec = skepu::BackendSpec{argv[2]};
 //	spec.setCPUThreads(<integer value>);
 	skepu::setGlobalBackendSpec(spec);
 	/* Skeleton instances */
 //	auto instance = skepu::Map(userfunction);
-// ...
+	auto dotprodMap = skepu::MapReduce<2>(multiply, add);
+	auto multMap = skepu::Map<2>(multiply);
+	auto addReduce = skepu::Reduce(add);
 	/* SkePU containers */
-	skepu::Vector<float> v1(size, 1.0f), v2(size, 2.0f);
+	skepu::Vector<float> v1(size, 1.0f), v2(size, 2.0f), v3(size, 1.0f);
 	/* Compute and measure time */
 	float resComb, resSep;
 	auto timeComb = skepu::benchmark::measureExecTime([&]
 	{
-		// your code here
+		resComb = dotprodMap(v1, v2);
 	});
 	auto timeSep = skepu::benchmark::measureExecTime([&]
 	{
-		// your code here
+		multMap(v3, v1, v2);
+		resSep = addReduce(v3);
 	});
 	std::cout << "Time Combined: " << (timeComb.count() / 10E6) << " seconds.\n";
 	std::cout << "Time Separate: " << ( timeSep.count() / 10E6) << " seconds.\n";
 	std::cout << "Result Combined: " << resComb << "\n";
 	std::cout << "Result Separate: " << resSep  << "\n";
 	return 0;
 }
--- a/README.md
+++ b/README.md
@@ -33,9 +33,10 @@ If you need to use the vector that the Map returns to anything else in the progr
 #### Question 1.3: Is there a SkePU backend which is always more efficient to use, or does this depend on the problem size?  Why?  Either show with measurements or provide a valid reasoning.
 CPU: Small problems sizes will be faster because the clock frequency of the CPU is faster than the GPU.
-GPU: Big problem sizes will be faster because there are many more cores in the GPU.
+GPU: Big problem sizes will be faster because there are many more cores in the GPU. Need big problems parallelizable to make use of the GPU, it takes time to send from CPU to GPU.
 #### Question 1.4: Try measuring the parallel back-ends with measureExecTime exchanged for measureExecTimeIdempotent. This measurement does a "cold run"of the lambda expression before running the proper measurement.  Do you see a difference for some backends, and if so, why?
+Especially for OpenCL, the bottleneck is loading the data from the CPU to the GPU.
 #### Question 2.1: Which version of the averaging filter (unified, separable) is the most efficient? Why?