Skip to content
Snippets Groups Projects
Commit d7617681 authored by Xuan Gu's avatar Xuan Gu
Browse files

Update 3 files

- /scripts/benchmark_multi_node.sbatch
- /scripts/benchmark_sbatch_submit.sh
- /README.md
parent afe017e0
No related branches found
No related tags found
No related merge requests found
# Benchmark of nnU-Net for PyTorch on Berzelius
# Berzelius nnU-Net Benchmark
The benchmarking is based on [Nvidia NGC nnU-net for Pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/resources/nnunet_for_pytorch) v21.11.0.
......@@ -27,8 +26,9 @@ docker push xuagu37/nvidia_nnu-net_for_pytorch:21.11.0
- Create directories
```
mkdir -p /proj/nsc_testing/xuan/nnUnet_benchmark
cd /proj/nsc_testing/xuan/nnUnet_benchmark
cd /proj/nsc_testing/xuan
git clone https://gitlab.liu.se/xuagu37/Berzelius-nnU-Net-Benchmark.git
cd Berzelius-nnU-Net-Benchmark
mkdir data results
```
<!-- - Clone the repository
......@@ -77,14 +77,13 @@ Exit the image.
- For benchmarking purpose, we use 1000 copied of a single image
```
bash copy_data_for_benchmark.sh
bash srtips/copy_data_for_benchmark.sh
```
- Run the script.
You need to modify the script for e.g. the name of your reservation, number of nodes, batch_size, etc. Also, choose either singularity or enroot.
```
cd /proj/nsc/xuan/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet
mkdir sbatch_out
bash benchmark_sbatch_submit.sh
bash srtips/benchmark_sbatch_submit.sh
```
### Results
......
......@@ -14,7 +14,7 @@ srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidi
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
#srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json""
############## Running srun enroot ... stopped working at 20220220
############## Running srun enroot ... stopped working at 20230220
# For enroot
#rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
#srun enroot start --rw --mount ${PWD}/data:/data --mount ${PWD}/results:/results nnunet bash -c "cd /workspace/nnunet_pyt && python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json""
......
cd /proj/nsc/users/xuan/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet
dim=2
nodes=1
for gpus in {1..8}; do
for batch_size in 128; do
for iteration in {1..100}; do
for gpus in 8; do
echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration}
# For single node
sbatch -o sbatch_out/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_iteration${iteration}.out benchmark_single_node.sbatch ${dim} ${nodes} ${gpus} ${batch_size} ${iteration}
for batch_size in 256; do
sleep 1 # pause to be kind to the scheduler
for iteration in {1..10}; do
echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration}
done
done
done
for nodes in {2..8}
for gpus in {8}; do
for batch_size in 128; do
for iteration in {1..100}; do
echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration}
# For single node
sbatch -o sbatch_out/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_iteration${iteration}.out benchmark_single_node.sbatch ${dim} ${nodes} ${gpus} ${batch_size} ${iteration}
......@@ -20,7 +32,6 @@ for gpus in 8; do
sleep 1 # pause to be kind to the scheduler
done
done
done
done
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment