From c8c728b570ecefd7e554acaa8ac678cbb6021493 Mon Sep 17 00:00:00 2001 From: Xuan Gu <xuagu37@gmail.com> Date: Wed, 2 Nov 2022 10:38:03 +0100 Subject: [PATCH] Update benchmark_multi_node.sh --- scripts/benchmark_multi_node.sh | 35 ++++++++------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/scripts/benchmark_multi_node.sh b/scripts/benchmark_multi_node.sh index 1a327eb..c6ef585 100644 --- a/scripts/benchmark_multi_node.sh +++ b/scripts/benchmark_multi_node.sh @@ -1,34 +1,15 @@ #!/bin/bash #SBATCH -A nsc -#SBATCH --nodes=2 +#SBATCH --nodes=8 #SBATCH --gres=gpu:8 #SBATCH --ntasks-per-node=8 -#SBATCH --time=0-21:00:00 -#SBATCH --reservation=nsc-testing -#SBATCH -o benchmark_nodes2.out +#SBATCH --time=0-00:10:00 +#####SBATCH --reservation=bt-xuan_2nodes -cd /proj/nsc/users/xuan/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet +# For singularity +rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json +srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json" -dim=2 -nodes=2 - -for gpus in 8; do - - for batch_size in 128; do - - for iteration in {1..100}; do - - echo dim $dim, nodes $nodes, gpus $gpus, batch_size $batch_size, tf32, iteration $iteration - rm -f results/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_tf32_iteration${iteration}.json - srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus $gpus --dim $dim --batch_size $batch_size --nodes $nodes --logname="benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_tf32_iteration${iteration}.json" - - echo dim $dim, nodes $nodes, gpus $gpus, batch_size $batch_size, amp, iteration $iteration - rm -f results/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_amp_iteration${iteration}.json - srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus $gpus --dim $dim --batch_size $batch_size --nodes $nodes --amp --logname="benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_amp_iteration${iteration}.json" - - done - - done - -done +rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json +srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json" -- GitLab