From 4998c1ec76dd2bc198968fb4775819c32b7ad4f8 Mon Sep 17 00:00:00 2001 From: Xuan Gu <xuan.gu@liu.se> Date: Wed, 25 Jan 2023 11:15:08 +0000 Subject: [PATCH] Upload New File --- scripts/benchmark_plot.m | 191 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 scripts/benchmark_plot.m diff --git a/scripts/benchmark_plot.m b/scripts/benchmark_plot.m new file mode 100644 index 0000000..63d992c --- /dev/null +++ b/scripts/benchmark_plot.m @@ -0,0 +1,191 @@ +close all +clear + +addpath(genpath('/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet')); +addpath(genpath('/home/xuagu37/OneDrive/ngc')); + +%% Load data + +benchmark = []; +dim = 2; + +for nodes = 1 : 8 + for gpus = 1 : 8 + for batch_size = [1, 2, 4, 8, 16, 32, 64, 128] + + for iteration = 1 : 100 + + file_name = ['benchmark_dim', num2str(dim), '_nodes', num2str(nodes), '_gpus', num2str(gpus), '_batchsize', num2str(batch_size), '_tf32_iteration', num2str(iteration), '.json']; + result_dir = ['/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet/results_20221101/', file_name]; + throughput_tf32(iteration) = get_throughput(result_dir); + + + file_name = ['benchmark_dim', num2str(dim), '_nodes', num2str(nodes), '_gpus', num2str(gpus), '_batchsize', num2str(batch_size), '_amp_iteration', num2str(iteration), '.json']; + result_dir = ['/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet/results_20221101/', file_name]; + throughput_amp(iteration) = get_throughput(result_dir); + + end + + throughput_tf32_mean = mean(throughput_tf32); + throughput_tf32_min = min(throughput_tf32); + throughput_tf32_max = max(throughput_tf32); + throughput_tf32_std = std(throughput_tf32); + throughput_tf32_cv = throughput_tf32_std/throughput_tf32_mean; + + throughput_amp_mean = mean(throughput_amp); + throughput_amp_min = min(throughput_amp); + throughput_amp_max = max(throughput_amp); + throughput_amp_std = std(throughput_amp); + throughput_amp_cv = throughput_amp_std/throughput_amp_mean; + + benchmark = [benchmark; [dim, nodes, gpus, batch_size, ... + throughput_tf32_mean, throughput_tf32_min, throughput_tf32_max, throughput_tf32_std, throughput_tf32_cv, ... + throughput_amp_mean, throughput_amp_min, throughput_amp_max, throughput_amp_std, throughput_amp_cv]]; + end + end +end + +col_names = {'dim', 'nodes', 'gpus', 'batch_size', ... + 'throughput_tf32_mean', 'throughput_tf32_min', 'throughput_tf32_max', 'throughput_tf32_std', 'throughput_tf32_cv', ... + 'throughput_amp_mean', 'throughput_amp_min', 'throughput_amp_max', 'throughput_amp_std', 'throughput_amp_cv'}; +benchmark_table = array2table(benchmark, 'VariableNames', col_names); + +benchmark_table(benchmark_table.throughput_tf32_mean==0, :) = []; +benchmark_table(benchmark_table.nodes>1, 3) = benchmark_table(benchmark_table.nodes>1, 2)*8; +benchmark_table.gpus = (benchmark_table.nodes - 1)*8 + benchmark_table.gpus; + + +%% Figure 1 + +gpus = [1 : 8, 16:8:64]; + +close all +figure('Position', [100 100 1200 600]) +for batch_size = [128] + + tp_tf32 = benchmark_table.throughput_tf32_mean(benchmark_table.batch_size == batch_size); + tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.batch_size == batch_size); + + plot(gpus, tp_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10) + hold on + plot(gpus, tp_tf32(1)*gpus, '--*', 'LineWidth', 2) + + plot(gpus, tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10) + plot(gpus, tp_amp(1)*gpus, '--*', 'LineWidth', 2) + +end + + +ax = gca; +set (gca, 'XTick', gpus); +set (gca, 'XTickLabel', {'', '', '', '', '', '', '',... + '1', '2', '3', '4', '5', '6', '7', '8'}); + +ax.XAxis.FontSize = 12; +ax.YAxis.FontSize = 12; +ax.YAxis.Exponent = 3; +ylim([0 72000]) +xlim([1 64]) + +legend({'Batch size 128, TF32', 'Batch size 128, TF32, ideal', 'Batch size 128, AMP', 'Batch size 128, AMP, ideal'}, 'FontSize', 12, 'Location', 'northwest') +xlabel('Nodes', 'FontSize', 14) +ylabel('Throughput (images/s)', 'FontSize', 14) +exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_gpus_ideal.png','Resolution',300) + + +%% Figure 2 + + +batch_size = benchmark_table.batch_size(1:8); + +close all +figure('Position', [100 100 1200 600]) + +color_counter = 1; +for gpus = [1 8 16] + + tp_tf32 = benchmark_table.throughput_tf32_mean(benchmark_table.gpus == gpus); + tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.gpus == gpus); + + plot(log2(batch_size), tp_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10) + hold on + plot(log2(batch_size), tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10) + +end + + +set (gca, 'XTickLabel', num2cell(batch_size)); +ax = gca; +ax.XAxis.FontSize = 12; +ax.YAxis.FontSize = 12; +ax.YAxis.Exponent = 3; +ylim([0 14320]) + +legend({'1 GPU, TF32', '1 GPU, AMP', '8 GPUs, TF32', '8 GPUs, AMP', '16 GPUs, TF32', '16 GPUs, AMP'}, 'FontSize', 12, 'Location', 'northwest') +xlabel('Batch size', 'FontSize', 14) +ylabel('Throughput (images/s)', 'FontSize', 14) +exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_batch_size.png','Resolution',300) + + + +%% Figure 3 + +batch_size = benchmark_table.batch_size(1:8); + +close all +figure('Position', [100 100 1200 600]) + +for gpus = [1 8] + + cv_tf32 = benchmark_table.throughput_tf32_cv(benchmark_table.gpus == gpus); + cv_amp = benchmark_table.throughput_amp_cv(benchmark_table.gpus == gpus); + + plot(log2(batch_size), cv_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10) + hold on + plot(log2(batch_size), cv_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10) + +end + + +set (gca, 'XTickLabel', num2cell(batch_size)); +ax = gca; +ax.XAxis.FontSize = 12; +ax.YAxis.FontSize = 12; +ylim([0 0.31]) + +legend({'1 GPU, TF32', '1 GPU, AMP', '8 GPUs, TF32', '8 GPUs, AMP'}, 'FontSize', 12, 'Location', 'northeast') +xlabel('Batch size', 'FontSize', 14) +ylabel('Coefficient of variation', 'FontSize', 14) +exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_cv.png','Resolution',300) + + + +%% Figure 4 + +batch_size = benchmark_table.batch_size(1:8); + +close all +figure('Position', [100 100 1200 600]) + +for gpus = [8 16] + + tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.gpus == gpus); + + plot(log2(batch_size), tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10) + hold on + plot(log2(batch_size), tp_amp(1)*[1, 2, 4, 8, 16, 32, 64, 128], '--*', 'LineWidth', 2) + +end + +set (gca, 'XTickLabel', num2cell(batch_size)); +ax = gca; +ax.XAxis.FontSize = 12; +ax.YAxis.FontSize = 12; +ax.YAxis.Exponent = 3; +ylim([0 40260]) + +legend({'8 GPU, AMP', '8 GPU, AMP ideal', '16 GPUs, AMP', '16 GPUs, AMP, ideal'}, 'FontSize', 12, 'Location', 'northwest') +xlabel('Batch size', 'FontSize', 14) +ylabel('Throughput (images/s)', 'FontSize', 14) +exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_batch_size_ideal.png','Resolution',300) + -- GitLab