From fe0aacd94c1cea578b7a41533239963fce3593df Mon Sep 17 00:00:00 2001 From: Xuan Gu <xuan.gu@liu.se> Date: Tue, 30 May 2023 14:13:40 +0000 Subject: [PATCH] Update 3 files - /scripts/run_benchmark.sh - /scripts/run_benchmark_single_node.sh - /scripts/run_bencmark_multi_node.sh --- scripts/run_benchmark.sh | 12 ++++-------- scripts/run_benchmark_single_node.sh | 13 +++---------- scripts/run_bencmark_multi_node.sh | 2 +- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/scripts/run_benchmark.sh b/scripts/run_benchmark.sh index 4a55f1a..f45cac5 100644 --- a/scripts/run_benchmark.sh +++ b/scripts/run_benchmark.sh @@ -12,15 +12,13 @@ for nodes in {1..1}; do # For single node if [ $nodes -eq 1 ]; then bash scripts/run_benchmark_single_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration} - - sleep 1 # pause to be kind to the scheduler + sleep 1 else # For multi node gpus=8 bash scripts/run_benchmark_multi_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration} - - sleep 1 # pause to be kind to the scheduler + sleep 1 fi done done @@ -38,15 +36,13 @@ for nodes in {1..1}; do # For single node if [ $nodes -eq 1 ]; then bash scripts/run_benchmark_single_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration} - - sleep 1 # pause to be kind to the scheduler + sleep 1 else # For multi node gpus=8 bash scripts/run_benchmark_multi_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration} - - sleep 1 # pause to be kind to the scheduler + sleep 1 fi done done diff --git a/scripts/run_benchmark_single_node.sh b/scripts/run_benchmark_single_node.sh index a52d110..c6e1fc5 100644 --- a/scripts/run_benchmark_single_node.sh +++ b/scripts/run_benchmark_single_node.sh @@ -1,13 +1,13 @@ #!/bin/bash -cat <<EOT > scripts/benchmark_single_node.sbatch +cat <<EOT > scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch #!/bin/bash #SBATCH -A nsc #SBATCH --nodes=1 #SBATCH --gres=gpu:${3} #SBATCH --time=0-0:10:00 -#SBATCH -o sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out +#SBATCH --output=sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out #SBATCH --reservation=devel ###################21.11.0 @@ -28,13 +28,6 @@ apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-n #rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json #apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'" -# For enroot -#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json -#enroot start --rw --mount ${PWD}/data:/data --mount ${PWD}/results:/results nnunet bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json'" - -#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json -#enroot start --rw --mount ${PWD}/data:/data --mount ${PWD}/results:/results nnunet bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'" - EOT -sbatch scripts/benchmark_single_node.sbatch \ No newline at end of file +#sbatch scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch \ No newline at end of file diff --git a/scripts/run_bencmark_multi_node.sh b/scripts/run_bencmark_multi_node.sh index a94f0ac..9ff4ae4 100644 --- a/scripts/run_bencmark_multi_node.sh +++ b/scripts/run_bencmark_multi_node.sh @@ -8,7 +8,7 @@ cat <<EOT > scripts/benchmark_multi_node.sbatch #SBATCH --gres=gpu:8 #SBATCH --ntasks-per-node=8 #SBATCH --time=0-00:10:00 -#SBATCH -o sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out +#SBATCH --output=sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out #SBATCH --reservation=devel # For apptainer -- GitLab