From c8c728b570ecefd7e554acaa8ac678cbb6021493 Mon Sep 17 00:00:00 2001
From: Xuan Gu <xuagu37@gmail.com>
Date: Wed, 2 Nov 2022 10:38:03 +0100
Subject: [PATCH] Update benchmark_multi_node.sh

---
 scripts/benchmark_multi_node.sh | 35 ++++++++-------------------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/scripts/benchmark_multi_node.sh b/scripts/benchmark_multi_node.sh
index 1a327eb..c6ef585 100644
--- a/scripts/benchmark_multi_node.sh
+++ b/scripts/benchmark_multi_node.sh
@@ -1,34 +1,15 @@
 #!/bin/bash
 
 #SBATCH -A nsc
-#SBATCH --nodes=2
+#SBATCH --nodes=8
 #SBATCH --gres=gpu:8
 #SBATCH --ntasks-per-node=8
-#SBATCH --time=0-21:00:00
-#SBATCH --reservation=nsc-testing
-#SBATCH -o benchmark_nodes2.out
+#SBATCH --time=0-00:10:00
+#####SBATCH --reservation=bt-xuan_2nodes
 
-cd /proj/nsc/users/xuan/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet
+# For singularity
+rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
+srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json"  
 
-dim=2
-nodes=2
-
-for gpus in 8; do
-
-    for batch_size in 128; do
-
-        for iteration in {1..100}; do
-
-            echo dim $dim, nodes $nodes, gpus $gpus, batch_size $batch_size, tf32, iteration $iteration
-            rm -f results/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_tf32_iteration${iteration}.json
-            srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus $gpus --dim $dim --batch_size $batch_size --nodes $nodes --logname="benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_tf32_iteration${iteration}.json"
-
-            echo dim $dim, nodes $nodes, gpus $gpus, batch_size $batch_size, amp, iteration $iteration
-            rm -f results/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_amp_iteration${iteration}.json
-            srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus $gpus --dim $dim --batch_size $batch_size --nodes $nodes --amp --logname="benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_amp_iteration${iteration}.json"
-
-        done
-
-    done
-
-done
+rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
+srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json"  
-- 
GitLab