From e2c7b9fa692ded404fe670f8afcc04606977da9c Mon Sep 17 00:00:00 2001 From: Xuan Gu <xuan.gu@liu.se> Date: Thu, 28 Sep 2023 09:52:40 +0000 Subject: [PATCH] Update 3 files - /PyTorch/Segmentation/nnUNet/benchmark.txt - /PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh - /PyTorch/Segmentation/nnUNet/run_benchmark.sh --- PyTorch/Segmentation/nnUNet/benchmark.txt | 1 - PyTorch/Segmentation/nnUNet/run_benchmark.sh | 21 +++++++++++++ .../nnUNet/run_benchmark_single_node.sh | 30 +++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 PyTorch/Segmentation/nnUNet/run_benchmark.sh create mode 100644 PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh diff --git a/PyTorch/Segmentation/nnUNet/benchmark.txt b/PyTorch/Segmentation/nnUNet/benchmark.txt index b11aada..7ee89df 100644 --- a/PyTorch/Segmentation/nnUNet/benchmark.txt +++ b/PyTorch/Segmentation/nnUNet/benchmark.txt @@ -2,7 +2,6 @@ MODULE_NAME=nnunet_for_pytorch MODULE_VERSION=21.11.0 - WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif diff --git a/PyTorch/Segmentation/nnUNet/run_benchmark.sh b/PyTorch/Segmentation/nnUNet/run_benchmark.sh new file mode 100644 index 0000000..4bdcb51 --- /dev/null +++ b/PyTorch/Segmentation/nnUNet/run_benchmark.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet + +dim=2 +for nodes in {1..1}; do + for gpus in {1,8}; do + for batch_size in {128}; do + for iteration in {1..1}; do + + echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration} + + # For single node + bash $WORK_DIR/run_benchmark_single_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration} + sleep 1 + + done + done + done +done diff --git a/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh new file mode 100644 index 0000000..47b800b --- /dev/null +++ b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +cat <<EOT > scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch +#!/bin/bash + +#SBATCH -A nsc +#SBATCH --nodes=${2} +#SBATCH --gpus=${3} +#SBATCH --time=0-0:10:00 +#SBATCH --output=sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out +#SBATCH --reservation=devel + +MODULE_NAME=nnunet_for_pytorch +MODULE_VERSION=21.11.0 +WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet +CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif + +mkdir -p $WORK_DIR/sbatch_out $WORK_DIR/benchmark_results + +cd $WORK_DIR +rm -f benchmark_results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json +srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json'" + +rm -f benchmark_results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json +srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'" + + +EOT + +sbatch scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch \ No newline at end of file -- GitLab