diff --git a/PyTorch/Segmentation/nnUNet/generate_benchmark_jobs.sh b/PyTorch/Segmentation/nnUNet/generate_benchmark_jobs.sh index 159d4bcf73041311107ebc75c9272c3e77208f3a..d9db6da5eca1208ccf690a4db435c281ead9e952 100644 --- a/PyTorch/Segmentation/nnUNet/generate_benchmark_jobs.sh +++ b/PyTorch/Segmentation/nnUNet/generate_benchmark_jobs.sh @@ -4,9 +4,9 @@ MODULE_NAME=nnunet_for_pytorch MODULE_VERSION=21.11.0 WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif -SBATCH_DIR=$WORK_DIR/sbatch_scripts/benchmark_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}_fat.sbatch -SBATCH_OUT_DIR=$WORK_DIR/sbatch_out/benchmark_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}_fat.out -LOG_DIR=benchmark_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_fat.json +SBATCH_DIR=$WORK_DIR/sbatch_scripts/benchmark_${6}_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}.sbatch +SBATCH_OUT_DIR=$WORK_DIR/sbatch_out/benchmark_${6}_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}.out +LOG_DIR=benchmark_${6}_${5}_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp.json mkdir -p $WORK_DIR/sbatch_out $WORK_DIR/sbatch_scripts cat <<EOT > $SBATCH_DIR @@ -17,10 +17,23 @@ cat <<EOT > $SBATCH_DIR #SBATCH --gpus=${3} #SBATCH --time=0-0:10:00 #SBATCH --output=$SBATCH_OUT_DIR -##SBATCH --reservation=devel -#SBATCH -C "fat" + +EOT + +if [ ${6} == "thin" ]; then + cat <<EOT >> $SBATCH_DIR + #SBATCH --reservation=devel + EOT + +else + cat <<EOT >> $SBATCH_DIR + #SBATCH -C "fat" + EOT +fi + +cat <<EOT >> $SBATCH_DIR rm -f $WORK_DIR/results/$LOG_DIR apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python scripts/benchmark.py --mode ${5} --gpus ${3} --dim ${1} --batch_size ${4} --amp --logname='$LOG_DIR' -EOT \ No newline at end of file +EOT diff --git a/PyTorch/Segmentation/nnUNet/submit_benchmark_jobs.sh b/PyTorch/Segmentation/nnUNet/submit_benchmark_jobs.sh index 52874a62bf6acaf26698311ac0556cb3468fce83..4ea31f2dbebf3ee094ed73157dfb633d8cf3e715 100644 --- a/PyTorch/Segmentation/nnUNet/submit_benchmark_jobs.sh +++ b/PyTorch/Segmentation/nnUNet/submit_benchmark_jobs.sh @@ -3,20 +3,23 @@ set -e WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/PyTorch/Segmentation/nnUNet benchmark_modes=("train" "predict") +node_types=("thin" "fat") dim=2 for nodes in {1..1}; do for gpus in {1,8}; do for batch_size in 256; do for benchmark_mode in "${benchmark_modes[@]}"; do + for node_type in "${node_types[@]}"; do + - echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, benchmark_mode ${benchmark_mode} + echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, benchmark_mode ${benchmark_mode}, node_type ${node_type} - # For single node - bash $WORK_DIR/generate_benchmark_jobs.sh ${dim} ${nodes} ${gpus} ${batch_size} ${benchmark_mode} - SBATCH_DIR=$WORK_DIR/sbatch_scripts/benchmark_${benchmark_mode}_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}.sbatch - sbatch $SBATCH_DIR - sleep 1 + # For single node + bash $WORK_DIR/generate_benchmark_jobs.sh ${dim} ${nodes} ${gpus} ${batch_size} ${benchmark_mode} + SBATCH_DIR=$WORK_DIR/sbatch_scripts/benchmark_${node_type}_${benchmark_mode}_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}.sbatch + #sbatch $SBATCH_DIR + sleep 1 done done