Skip to content
Snippets Groups Projects
Commit 4998c1ec authored by Xuan Gu's avatar Xuan Gu
Browse files

Upload New File

parent d504ce10
No related branches found
No related tags found
No related merge requests found
close all
clear
addpath(genpath('/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet'));
addpath(genpath('/home/xuagu37/OneDrive/ngc'));
%% Load data
benchmark = [];
dim = 2;
for nodes = 1 : 8
for gpus = 1 : 8
for batch_size = [1, 2, 4, 8, 16, 32, 64, 128]
for iteration = 1 : 100
file_name = ['benchmark_dim', num2str(dim), '_nodes', num2str(nodes), '_gpus', num2str(gpus), '_batchsize', num2str(batch_size), '_tf32_iteration', num2str(iteration), '.json'];
result_dir = ['/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet/results_20221101/', file_name];
throughput_tf32(iteration) = get_throughput(result_dir);
file_name = ['benchmark_dim', num2str(dim), '_nodes', num2str(nodes), '_gpus', num2str(gpus), '_batchsize', num2str(batch_size), '_amp_iteration', num2str(iteration), '.json'];
result_dir = ['/home/xuagu37/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet/results_20221101/', file_name];
throughput_amp(iteration) = get_throughput(result_dir);
end
throughput_tf32_mean = mean(throughput_tf32);
throughput_tf32_min = min(throughput_tf32);
throughput_tf32_max = max(throughput_tf32);
throughput_tf32_std = std(throughput_tf32);
throughput_tf32_cv = throughput_tf32_std/throughput_tf32_mean;
throughput_amp_mean = mean(throughput_amp);
throughput_amp_min = min(throughput_amp);
throughput_amp_max = max(throughput_amp);
throughput_amp_std = std(throughput_amp);
throughput_amp_cv = throughput_amp_std/throughput_amp_mean;
benchmark = [benchmark; [dim, nodes, gpus, batch_size, ...
throughput_tf32_mean, throughput_tf32_min, throughput_tf32_max, throughput_tf32_std, throughput_tf32_cv, ...
throughput_amp_mean, throughput_amp_min, throughput_amp_max, throughput_amp_std, throughput_amp_cv]];
end
end
end
col_names = {'dim', 'nodes', 'gpus', 'batch_size', ...
'throughput_tf32_mean', 'throughput_tf32_min', 'throughput_tf32_max', 'throughput_tf32_std', 'throughput_tf32_cv', ...
'throughput_amp_mean', 'throughput_amp_min', 'throughput_amp_max', 'throughput_amp_std', 'throughput_amp_cv'};
benchmark_table = array2table(benchmark, 'VariableNames', col_names);
benchmark_table(benchmark_table.throughput_tf32_mean==0, :) = [];
benchmark_table(benchmark_table.nodes>1, 3) = benchmark_table(benchmark_table.nodes>1, 2)*8;
benchmark_table.gpus = (benchmark_table.nodes - 1)*8 + benchmark_table.gpus;
%% Figure 1
gpus = [1 : 8, 16:8:64];
close all
figure('Position', [100 100 1200 600])
for batch_size = [128]
tp_tf32 = benchmark_table.throughput_tf32_mean(benchmark_table.batch_size == batch_size);
tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.batch_size == batch_size);
plot(gpus, tp_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10)
hold on
plot(gpus, tp_tf32(1)*gpus, '--*', 'LineWidth', 2)
plot(gpus, tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
plot(gpus, tp_amp(1)*gpus, '--*', 'LineWidth', 2)
end
ax = gca;
set (gca, 'XTick', gpus);
set (gca, 'XTickLabel', {'', '', '', '', '', '', '',...
'1', '2', '3', '4', '5', '6', '7', '8'});
ax.XAxis.FontSize = 12;
ax.YAxis.FontSize = 12;
ax.YAxis.Exponent = 3;
ylim([0 72000])
xlim([1 64])
legend({'Batch size 128, TF32', 'Batch size 128, TF32, ideal', 'Batch size 128, AMP', 'Batch size 128, AMP, ideal'}, 'FontSize', 12, 'Location', 'northwest')
xlabel('Nodes', 'FontSize', 14)
ylabel('Throughput (images/s)', 'FontSize', 14)
exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_gpus_ideal.png','Resolution',300)
%% Figure 2
batch_size = benchmark_table.batch_size(1:8);
close all
figure('Position', [100 100 1200 600])
color_counter = 1;
for gpus = [1 8 16]
tp_tf32 = benchmark_table.throughput_tf32_mean(benchmark_table.gpus == gpus);
tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.gpus == gpus);
plot(log2(batch_size), tp_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10)
hold on
plot(log2(batch_size), tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
end
set (gca, 'XTickLabel', num2cell(batch_size));
ax = gca;
ax.XAxis.FontSize = 12;
ax.YAxis.FontSize = 12;
ax.YAxis.Exponent = 3;
ylim([0 14320])
legend({'1 GPU, TF32', '1 GPU, AMP', '8 GPUs, TF32', '8 GPUs, AMP', '16 GPUs, TF32', '16 GPUs, AMP'}, 'FontSize', 12, 'Location', 'northwest')
xlabel('Batch size', 'FontSize', 14)
ylabel('Throughput (images/s)', 'FontSize', 14)
exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_batch_size.png','Resolution',300)
%% Figure 3
batch_size = benchmark_table.batch_size(1:8);
close all
figure('Position', [100 100 1200 600])
for gpus = [1 8]
cv_tf32 = benchmark_table.throughput_tf32_cv(benchmark_table.gpus == gpus);
cv_amp = benchmark_table.throughput_amp_cv(benchmark_table.gpus == gpus);
plot(log2(batch_size), cv_tf32, '-x', 'LineWidth', 2, 'MarkerSize', 10)
hold on
plot(log2(batch_size), cv_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
end
set (gca, 'XTickLabel', num2cell(batch_size));
ax = gca;
ax.XAxis.FontSize = 12;
ax.YAxis.FontSize = 12;
ylim([0 0.31])
legend({'1 GPU, TF32', '1 GPU, AMP', '8 GPUs, TF32', '8 GPUs, AMP'}, 'FontSize', 12, 'Location', 'northeast')
xlabel('Batch size', 'FontSize', 14)
ylabel('Coefficient of variation', 'FontSize', 14)
exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_cv.png','Resolution',300)
%% Figure 4
batch_size = benchmark_table.batch_size(1:8);
close all
figure('Position', [100 100 1200 600])
for gpus = [8 16]
tp_amp = benchmark_table.throughput_amp_mean(benchmark_table.gpus == gpus);
plot(log2(batch_size), tp_amp, '-s', 'LineWidth', 2, 'MarkerSize', 10)
hold on
plot(log2(batch_size), tp_amp(1)*[1, 2, 4, 8, 16, 32, 64, 128], '--*', 'LineWidth', 2)
end
set (gca, 'XTickLabel', num2cell(batch_size));
ax = gca;
ax.XAxis.FontSize = 12;
ax.YAxis.FontSize = 12;
ax.YAxis.Exponent = 3;
ylim([0 40260])
legend({'8 GPU, AMP', '8 GPU, AMP ideal', '16 GPUs, AMP', '16 GPUs, AMP, ideal'}, 'FontSize', 12, 'Location', 'northwest')
xlabel('Batch size', 'FontSize', 14)
ylabel('Throughput (images/s)', 'FontSize', 14)
exportgraphics(ax,'/home/xuagu37/OneDrive/ngc/benchmark_throughput_batch_size_ideal.png','Resolution',300)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment