From e2c7b9fa692ded404fe670f8afcc04606977da9c Mon Sep 17 00:00:00 2001
From: Xuan Gu <xuan.gu@liu.se>
Date: Thu, 28 Sep 2023 09:52:40 +0000
Subject: [PATCH] Update 3 files

- /PyTorch/Segmentation/nnUNet/benchmark.txt
- /PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh
- /PyTorch/Segmentation/nnUNet/run_benchmark.sh
---
 PyTorch/Segmentation/nnUNet/benchmark.txt     |  1 -
 PyTorch/Segmentation/nnUNet/run_benchmark.sh  | 21 +++++++++++++
 .../nnUNet/run_benchmark_single_node.sh       | 30 +++++++++++++++++++
 3 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 PyTorch/Segmentation/nnUNet/run_benchmark.sh
 create mode 100644 PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh

diff --git a/PyTorch/Segmentation/nnUNet/benchmark.txt b/PyTorch/Segmentation/nnUNet/benchmark.txt
index b11aada..7ee89df 100644
--- a/PyTorch/Segmentation/nnUNet/benchmark.txt
+++ b/PyTorch/Segmentation/nnUNet/benchmark.txt
@@ -2,7 +2,6 @@
 
 MODULE_NAME=nnunet_for_pytorch
 MODULE_VERSION=21.11.0
-
 WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet
 CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif
 
diff --git a/PyTorch/Segmentation/nnUNet/run_benchmark.sh b/PyTorch/Segmentation/nnUNet/run_benchmark.sh
new file mode 100644
index 0000000..4bdcb51
--- /dev/null
+++ b/PyTorch/Segmentation/nnUNet/run_benchmark.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -e
+
+WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet
+
+dim=2
+for nodes in {1..1}; do
+    for gpus in {1,8}; do
+        for batch_size in {128}; do
+            for iteration in {1..1}; do
+
+                echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration}
+
+                # For single node
+                bash $WORK_DIR/run_benchmark_single_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration}
+                sleep 1 
+                
+            done
+        done
+    done
+done
diff --git a/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh
new file mode 100644
index 0000000..47b800b
--- /dev/null
+++ b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+cat <<EOT > scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch
+#!/bin/bash
+
+#SBATCH -A nsc
+#SBATCH --nodes=${2}
+#SBATCH --gpus=${3}
+#SBATCH --time=0-0:10:00
+#SBATCH --output=sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out
+#SBATCH --reservation=devel
+
+MODULE_NAME=nnunet_for_pytorch
+MODULE_VERSION=21.11.0
+WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet
+CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif
+
+mkdir -p $WORK_DIR/sbatch_out $WORK_DIR/benchmark_results
+
+cd $WORK_DIR
+rm -f benchmark_results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
+srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json'"
+
+rm -f benchmark_results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
+srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'" 
+
+
+EOT
+
+sbatch scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch
\ No newline at end of file
-- 
GitLab