diff --git a/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh index 8e7ba99f0589ddaab3380c44714cb627b4c8ae05..5e632665b34c2a9ad6985483e6ff9a4df089a91e 100644 --- a/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh +++ b/PyTorch/Segmentation/nnUNet/run_benchmark_single_node.sh @@ -4,9 +4,9 @@ MODULE_NAME=nnunet_for_pytorch MODULE_VERSION=21.11.0 WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif -SBATCH_DIR=$WORK_DIR/sbatch_scripts/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.sbatch -SBATCH_OUT_DIR=$WORK_DIR/sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out -LOG_DIR=benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json +SBATCH_DIR=$WORK_DIR/sbatch_scripts/benchmark_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}.sbatch +SBATCH_OUT_DIR=$WORK_DIR/sbatch_out/benchmark_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}.out +LOG_DIR=benchmark_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp.json mkdir -p $WORK_DIR/sbatch_out $WORK_DIR/sbatch_scripts cat <<EOT > $SBATCH_DIR @@ -25,7 +25,7 @@ WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif rm -f $WORK_DIR/results/$LOG_DIR -apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --amp --logname='$LOG_DIR' +apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python scripts/benchmark.py --mode ${5} --gpus ${3} --dim ${1} --batch_size ${4} --amp --logname='$LOG_DIR' EOT diff --git a/PyTorch/Segmentation/nnUNet/submit_benchmark_sbatch.sh b/PyTorch/Segmentation/nnUNet/submit_benchmark_sbatch.sh index 2e5f89a951c9d423a43e84f820fe0a6168fbe35e..29a30d16dc739782c313aeba62c76eef1ae9544a 100644 --- a/PyTorch/Segmentation/nnUNet/submit_benchmark_sbatch.sh +++ b/PyTorch/Segmentation/nnUNet/submit_benchmark_sbatch.sh @@ -2,17 +2,18 @@ set -e WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet +benchmark_modes=("train" "predic") dim=2 for nodes in {1..1}; do for gpus in {1,8}; do for batch_size in 256; do - for iteration in {1..1}; do + for benchmark_mode in "${benchmark_modes[@]}"; do - echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration} + echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, benchmark_mode ${benchmark_mode} # For single node - bash $WORK_DIR/run_benchmark_single_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${iteration} + bash $WORK_DIR/run_benchmark_single_node.sh ${dim} ${nodes} ${gpus} ${batch_size} ${benchmark_mode} sleep 1 done