diff --git a/PyTorch/Segmentation/nnUNet/benchmark.txt b/PyTorch/Segmentation/nnUNet/benchmark.txt index b11aada6fe687d311874327ea3fe0d851468fe00..7ee89dfc9857a996e31add10fece2aeaae500454 100644 --- a/PyTorch/Segmentation/nnUNet/benchmark.txt +++ b/PyTorch/Segmentation/nnUNet/benchmark.txt @@ -2,7 +2,6 @@ MODULE_NAME=nnunet_for_pytorch MODULE_VERSION=21.11.0 - WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif diff --git a/PyTorch/Segmentation/nnUNet/run_benchmark.sh b/PyTorch/Segmentation/nnUNet/run_benchmark.sh new file mode 100644 index 0000000000000000000000000000000000000000..4bdcb5117a0b2d25be9fe72a80b5659eceae8bca --- /dev/null +++ b/PyTorch/Segmentation/nnUNet/run_benchmark.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet + +dim=2 +for nodes in {1..1}; do + for gpus in {1,8}; do + for batch_size in {128}; do + for iteration in {1..1}; do + + echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration} + + # For single node + bash $WORK_DIR/run_benchmark_single_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration} + sleep 1 + + done + done + done +done diff --git a/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh new file mode 100644 index 0000000000000000000000000000000000000000..47b800b334110e6f91738e7b5146e279062c006a --- /dev/null +++ b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +cat <<EOT > scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch +#!/bin/bash + +#SBATCH -A nsc +#SBATCH --nodes=${2} +#SBATCH --gpus=${3} +#SBATCH --time=0-0:10:00 +#SBATCH --output=sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out +#SBATCH --reservation=devel + +MODULE_NAME=nnunet_for_pytorch +MODULE_VERSION=21.11.0 +WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet +CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif + +mkdir -p $WORK_DIR/sbatch_out $WORK_DIR/benchmark_results + +cd $WORK_DIR +rm -f benchmark_results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json +srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json'" + +rm -f benchmark_results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json +srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'" + + +EOT + +sbatch scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch \ No newline at end of file