Skip to content
Snippets Groups Projects
Commit dace0691 authored by Xuan Gu's avatar Xuan Gu
Browse files

Update 2 files

- /scripts/benchmark_sbatch_submit.sh
- /scripts/benchmark_multi_node.sbatch
parent d7617681
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
sbatch <<EOT
#!/bin/bash
#SBATCH -A nsc
#SBATCH --nodes=8
#SBATCH --nodes=${2}
#SBATCH --gres=gpu:8
#SBATCH --ntasks-per-node=8
#SBATCH --time=0-00:10:00
#####SBATCH --reservation=bt-xuan_2nodes
# For singularity
rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json""
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
# srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json""
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
#srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json""
......@@ -24,9 +27,10 @@ srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidi
############## Running srun with pyxis works
# For enroot
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
#srun --container-image=/proj/nsc_testing/xuan/enroot/xuagu37+nvidia_nnu-net_for_pytorch+21.11.0.sqsh --container-name=nnunet --container-mounts=${PWD}/data:/data,${PWD}/results:/results --container-writable bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json""
rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
srun --container-image=/proj/nsc_testing/xuan/enroot/xuagu37+nvidia_nnu-net_for_pytorch+21.11.0.sqsh --container-name=nnunet --container-mounts=${PWD}/data:/data,${PWD}/results:/results --container-writable bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json""
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
#srun --container-image=/proj/nsc_testing/xuan/enroot/xuagu37+nvidia_nnu-net_for_pytorch+21.11.0.sqsh --container-name=nnunet --container-mounts=${PWD}/data:/data,${PWD}/results:/results --container-writable bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json""
rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
srun --container-image=/proj/nsc_testing/xuan/enroot/xuagu37+nvidia_nnu-net_for_pytorch+21.11.0.sqsh --container-name=nnunet --container-mounts=${PWD}/data:/data,${PWD}/results:/results --container-writable bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json""
EOT
\ No newline at end of file
......@@ -17,21 +17,17 @@ for gpus in {1..8}; do
done
done
for nodes in {2..8}
for gpus in {8}; do
for batch_size in 128; do
for iteration in {1..100}; do
for nodes in {2..8}; do
for gpus in {8}; do
for batch_size in 128; do
for iteration in {1..100}; do
echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration}
# For single node
sbatch -o sbatch_out/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_iteration${iteration}.out benchmark_single_node.sbatch ${dim} ${nodes} ${gpus} ${batch_size} ${iteration}
# For multi node
#sbatch -o sbatch_out/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_iteration${iteration}.out benchmark_multi_node.sbatch ${dim} ${nodes} ${gpus} ${batch_size} ${iteration}
sleep 1 # pause to be kind to the scheduler
# For multi node
sbatch -o sbatch_out/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_iteration${iteration}.out benchmark_multi_node.sbatch ${dim} ${nodes} ${gpus} ${batch_size} ${iteration}
sleep 1 # pause to be kind to the scheduler
done
done
done
done
done
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment