Skip to content
Snippets Groups Projects
Unverified Commit c8c728b5 authored by Xuan Gu's avatar Xuan Gu Committed by GitHub
Browse files

Update benchmark_multi_node.sh

parent 85053246
No related merge requests found
#!/bin/bash
#SBATCH -A nsc
#SBATCH --nodes=2
#SBATCH --nodes=8
#SBATCH --gres=gpu:8
#SBATCH --ntasks-per-node=8
#SBATCH --time=0-21:00:00
#SBATCH --reservation=nsc-testing
#SBATCH -o benchmark_nodes2.out
#SBATCH --time=0-00:10:00
#####SBATCH --reservation=bt-xuan_2nodes
cd /proj/nsc/users/xuan/ngc/DeepLearningExamples/PyTorch/Segmentation/nnUNet
# For singularity
rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json
srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_tf32_iteration${5}.json"
dim=2
nodes=2
for gpus in 8; do
for batch_size in 128; do
for iteration in {1..100}; do
echo dim $dim, nodes $nodes, gpus $gpus, batch_size $batch_size, tf32, iteration $iteration
rm -f results/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_tf32_iteration${iteration}.json
srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus $gpus --dim $dim --batch_size $batch_size --nodes $nodes --logname="benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_tf32_iteration${iteration}.json"
echo dim $dim, nodes $nodes, gpus $gpus, batch_size $batch_size, amp, iteration $iteration
rm -f results/benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_amp_iteration${iteration}.json
srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus $gpus --dim $dim --batch_size $batch_size --nodes $nodes --amp --logname="benchmark_dim${dim}_nodes${nodes}_gpus${gpus}_batchsize${batch_size}_amp_iteration${iteration}.json"
done
done
done
rm -f results/benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json
srun singularity exec --nv -B ${PWD}/data:/data -B ${PWD}/results:/results nvidia_nnu-net_for_pytorch.sif python scripts/benchmark.py --mode train --gpus ${3} --dim ${1} --batch_size ${4} --nodes ${2} --amp --logname="benchmark_dim${1}_nodes${2}_gpus${3}_batchsize${4}_amp_iteration${5}.json"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment