From 4998c1ec76dd2bc198968fb4775819c32b7ad4f8 Mon Sep 17 00:00:00 2001
From: Xuan Gu <xuan.gu@liu.se>
Date: Wed, 25 Jan 2023 11:15:08 +0000
Subject: [PATCH] Upload New File

---
 scripts/benchmark_plot.m | 191 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 scripts/benchmark_plot.m

diff --git a/scripts/benchmark_plot.m b/scripts/benchmark_plot.m
new file mode 100644
index 0000000..63d992c
--- /dev/null
+++ b/scripts/benchmark_plot.m
@@ -0,0 +1,191 @@
+close all
+clear
+
+addpath(genpath('/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet'));
+addpath(genpath('/home/xuagu37/OneDrive/ngc'));
+
+%% Load data
+
+benchmark = [];
+dim = 2;
+
+for nodes = 1 : 8
+    for gpus = 1 : 8
+        for batch_size = [1, 2, 4, 8, 16, 32, 64, 128]
+
+            for iteration = 1 : 100
+
+                file_name = ['benchmark_dim', num2str(dim), '_nodes', num2str(nodes), '_gpus', num2str(gpus), '_batchsize', num2str(batch_size), '_tf32_iteration', num2str(iteration), '.json'];
+                result_dir = ['/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet/results_20221101/', file_name];
+                throughput_tf32(iteration) = get_throughput(result_dir);
+
+
+                file_name = ['benchmark_dim', num2str(dim), '_nodes', num2str(nodes), '_gpus', num2str(gpus), '_batchsize', num2str(batch_size), '_amp_iteration', num2str(iteration), '.json'];
+                result_dir = ['/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet/results_20221101/', file_name];                
+                throughput_amp(iteration) = get_throughput(result_dir);
+
+            end
+
+            throughput_tf32_mean = mean(throughput_tf32);
+            throughput_tf32_min = min(throughput_tf32);
+            throughput_tf32_max = max(throughput_tf32);
+            throughput_tf32_std = std(throughput_tf32);
+            throughput_tf32_cv = throughput_tf32_std/throughput_tf32_mean;
+
+            throughput_amp_mean = mean(throughput_amp);
+            throughput_amp_min = min(throughput_amp);
+            throughput_amp_max = max(throughput_amp);
+            throughput_amp_std = std(throughput_amp);
+            throughput_amp_cv = throughput_amp_std/throughput_amp_mean;
+
+            benchmark = [benchmark; [dim, nodes, gpus, batch_size, ...
+                throughput_tf32_mean, throughput_tf32_min, throughput_tf32_max, throughput_tf32_std, throughput_tf32_cv, ...
+                throughput_amp_mean, throughput_amp_min, throughput_amp_max, throughput_amp_std, throughput_amp_cv]];
+        end
+    end
+end
+
+col_names = {'dim', 'nodes', 'gpus', 'batch_size', ...
+            'throughput_tf32_mean', 'throughput_tf32_min', 'throughput_tf32_max', 'throughput_tf32_std', 'throughput_tf32_cv', ...
+            'throughput_amp_mean', 'throughput_amp_min', 'throughput_amp_max', 'throughput_amp_std', 'throughput_amp_cv'};
+benchmark_table = array2table(benchmark, 'VariableNames', col_names);
+
+benchmark_table(benchmark_table.throughput_tf32_mean==0, :) = [];
+benchmark_table(benchmark_table.nodes>1, 3) = benchmark_table(benchmark_table.nodes>1, 2)*8;
+benchmark_table.gpus = (benchmark_table.nodes - 1)*8 + benchmark_table.gpus;
+
+
+%% Figure 1
+
+gpus = [1 : 8, 16:8:64];
+
+close all
+figure('Position', [100 100 1200 600])
+for batch_size = [128]
+
+    tp_tf32 = benchmark_table.throughput_tf32_mean(benchmark_table.batch_size == batch_size);
+    tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.batch_size == batch_size);
+
+    plot(gpus, tp_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10)
+    hold on
+    plot(gpus, tp_tf32(1)*gpus, '--*', 'LineWidth', 2)
+
+    plot(gpus, tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
+    plot(gpus, tp_amp(1)*gpus, '--*', 'LineWidth', 2)
+
+end
+
+
+ax = gca;
+set (gca, 'XTick', gpus);
+set (gca, 'XTickLabel', {'', '', '', '', '', '', '',...
+                        '1', '2', '3', '4', '5', '6', '7', '8'});
+
+ax.XAxis.FontSize = 12;
+ax.YAxis.FontSize = 12;
+ax.YAxis.Exponent = 3;
+ylim([0 72000])
+xlim([1 64])
+
+legend({'Batch size 128, TF32', 'Batch size 128, TF32, ideal', 'Batch size 128, AMP', 'Batch size 128, AMP, ideal'}, 'FontSize', 12, 'Location', 'northwest')
+xlabel('Nodes', 'FontSize', 14)
+ylabel('Throughput (images/s)', 'FontSize', 14)
+exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_gpus_ideal.png','Resolution',300)
+
+
+%% Figure 2
+
+
+batch_size = benchmark_table.batch_size(1:8);
+
+close all
+figure('Position', [100 100 1200 600])
+
+color_counter = 1;
+for gpus = [1 8 16]
+
+    tp_tf32 = benchmark_table.throughput_tf32_mean(benchmark_table.gpus == gpus);
+    tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.gpus == gpus);
+
+    plot(log2(batch_size), tp_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10)
+    hold on
+    plot(log2(batch_size), tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
+
+end
+
+
+set (gca, 'XTickLabel', num2cell(batch_size));
+ax = gca;
+ax.XAxis.FontSize = 12;
+ax.YAxis.FontSize = 12;
+ax.YAxis.Exponent = 3;
+ylim([0 14320])
+
+legend({'1 GPU, TF32', '1 GPU, AMP', '8 GPUs, TF32', '8 GPUs, AMP', '16 GPUs, TF32', '16 GPUs, AMP'}, 'FontSize', 12, 'Location', 'northwest')
+xlabel('Batch size', 'FontSize', 14)
+ylabel('Throughput (images/s)', 'FontSize', 14)
+exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_batch_size.png','Resolution',300)
+
+
+
+%% Figure 3
+
+batch_size = benchmark_table.batch_size(1:8);
+
+close all
+figure('Position', [100 100 1200 600])
+
+for gpus = [1 8]
+
+    cv_tf32 = benchmark_table.throughput_tf32_cv(benchmark_table.gpus == gpus);
+    cv_amp = benchmark_table.throughput_amp_cv(benchmark_table.gpus == gpus);
+
+    plot(log2(batch_size), cv_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10)
+    hold on
+    plot(log2(batch_size), cv_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
+
+end
+
+
+set (gca, 'XTickLabel', num2cell(batch_size));
+ax = gca;
+ax.XAxis.FontSize = 12;
+ax.YAxis.FontSize = 12;
+ylim([0 0.31])
+
+legend({'1 GPU, TF32', '1 GPU, AMP', '8 GPUs, TF32', '8 GPUs, AMP'}, 'FontSize', 12, 'Location', 'northeast')
+xlabel('Batch size', 'FontSize', 14)
+ylabel('Coefficient of variation', 'FontSize', 14)
+exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_cv.png','Resolution',300)
+
+
+
+%% Figure 4
+
+batch_size = benchmark_table.batch_size(1:8);
+
+close all
+figure('Position', [100 100 1200 600])
+
+for gpus = [8 16]
+
+    tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.gpus == gpus);
+
+    plot(log2(batch_size), tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
+    hold on
+    plot(log2(batch_size), tp_amp(1)*[1, 2, 4, 8, 16, 32, 64, 128], '--*', 'LineWidth', 2)
+
+end
+
+set (gca, 'XTickLabel', num2cell(batch_size));
+ax = gca;
+ax.XAxis.FontSize = 12;
+ax.YAxis.FontSize = 12;
+ax.YAxis.Exponent = 3;
+ylim([0 40260])
+
+legend({'8 GPU, AMP', '8 GPU, AMP ideal', '16 GPUs, AMP', '16 GPUs, AMP, ideal'}, 'FontSize', 12, 'Location', 'northwest')
+xlabel('Batch size', 'FontSize', 14)
+ylabel('Throughput (images/s)', 'FontSize', 14)
+exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_batch_size_ideal.png','Resolution',300)
+
-- 
GitLab