Skip to content
Snippets Groups Projects
Commit 02870d42 authored by Xuan Gu's avatar Xuan Gu
Browse files

Update 2 files

- /scripts/generate_single_node_job.sh
- /scripts/generate_multi_node_job.sh
parent 05c354a8
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
cat <<EOT > scripts/benchmark_single_node.sbatch
#!/bin/bash
#SBATCH -A nsc
#SBATCH --nodes=${2}
#SBATCH --gres=gpu:8
#SBATCH --ntasks-per-node=8
#SBATCH --time=0-00:10:00
#SBATCH -o sbatch_out/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_iteration${5}.out
#SBATCH --reservation=devel
# For apptainer
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
# srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json'"
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
#srun apptainer exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'"
############## Running srun enroot ... stopped working at 20230220
# For enroot
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
#srun enroot start --rw --mount ${PWD}/data:/data --mount ${PWD}/results:/results nnunet bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json'"
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
#srun enroot start --rw --mount ${PWD}/data:/data --mount ${PWD}/results:/results nnunet bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'"
############## Running srun with pyxis works
# For enroot
rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
srun --container-image=/proj/nsc_testing/xuan/enroot/xuagu37+nvidia_nnu-net_for_pytorch+21.11.0.sqsh --container-name=nnunet --container-mounts=${PWD}/data:/data,${PWD}/results:/results --container-writable bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json'"
rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
srun --container-image=/proj/nsc_testing/xuan/enroot/xuagu37+nvidia_nnu-net_for_pytorch+21.11.0.sqsh --container-name=nnunet --container-mounts=${PWD}/data:/data,${PWD}/results:/results --container-writable bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname='benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json'"
EOT
\ No newline at end of file
#!/bin/bash
cat <<EOT > benchmark_single_node.sh
cat <<EOT > scripts/benchmark_single_node.sbatch
#!/bin/bash
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment