From 78a63ee3082be4f6a9e1a9a28f41330638b9ed50 Mon Sep 17 00:00:00 2001
From: Xuan Gu <xuan.gu@liu.se>
Date: Thu, 12 Oct 2023 22:18:35 +0000
Subject: [PATCH] Update 4 files

- /NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/build.txt
- /NVIDIA/DeepLearningExamples/PyTorch/README.md
- /MLPerf/training/image_segmentation/pytorch/build.txt
- /MLPerf/training/image_segmentation/pytorch/README.md
---
 .../image_segmentation/pytorch/README.md      | 46 +++++++++++++++++++
 .../image_segmentation/pytorch/build.txt      |  2 +-
 NVIDIA/DeepLearningExamples/PyTorch/README.md | 41 +++++++++++++++++
 .../PyTorch/Segmentation/nnUNet/build.txt     |  2 -
 4 files changed, 88 insertions(+), 3 deletions(-)
 create mode 100644 MLPerf/training/image_segmentation/pytorch/README.md
 create mode 100644 NVIDIA/DeepLearningExamples/PyTorch/README.md

diff --git a/MLPerf/training/image_segmentation/pytorch/README.md b/MLPerf/training/image_segmentation/pytorch/README.md
new file mode 100644
index 0000000..01a0a91
--- /dev/null
+++ b/MLPerf/training/image_segmentation/pytorch/README.md
@@ -0,0 +1,46 @@
+
+### Seting paths
+
+The U-Net3D from MLPerf has no version control.
+
+
+```
+MODEL_NAME=nnunet_for_pytorch
+MODEL_BASE=/proj/nsc_testing/xuan/containers/pytorch_1.7.1-cuda11.0-cudnn8-runtime.sif
+CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODEL_NAME}.sif
+DEF_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/MLPerf/training/image_segmentation/pytorch//${MODEL_NAME}.def
+WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/MLPerf/training/image_segmentation/pytorch
+```
+### Building the container
+
+```
+apptainer build $MODEL_BASE  docker://pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
+apptainer build $CONTAINER_DIR $DEF_DIR
+```
+
+
+### Downloading and preprocessing the data
+
+```
+cd $WORK_DIR
+git clone https://github.com/neheller/kits19
+apptainer exec $CONTAINER_DIR bash -c "cd kits19 && python3 -m starter_code.get_imaging"
+mv kits19/data/* raw-data/
+rm -rf kits19
+
+apptainer exec --nv -B ${WORK_DIR}/raw-data:/raw-data -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results $CONTAINER_DIR bash -c "cd /workspace/unet3d && python3 preprocess_dataset.py --data_dir /raw-data --results_dir /data"
+```
+
+
+
+### Running benchmarking 
+
+```
+apptainer exec --nv -B ${WORK_DIR}/raw-data:/raw-data -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results $CONTAINER_DIR bash -c "cd /workspace/unet3d && bash run_and_time.sh 1"
+```
+
+### Running benchmarking using batch jobs
+
+```
+bash submit_benchmark_jobs.sh
+```
diff --git a/MLPerf/training/image_segmentation/pytorch/build.txt b/MLPerf/training/image_segmentation/pytorch/build.txt
index 59ad0a5..fd77634 100644
--- a/MLPerf/training/image_segmentation/pytorch/build.txt
+++ b/MLPerf/training/image_segmentation/pytorch/build.txt
@@ -1,5 +1,5 @@
 
-apptainer build containers/pytorch_1.7.1-cuda11.0-cudnn8-devel.sif  docker://pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
+apptainer build containers/pytorch_1.7.1-cuda11.0-cudnn8-runtime.sif  docker://pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
 
 
 MODULE_NAME=U-Net3D
diff --git a/NVIDIA/DeepLearningExamples/PyTorch/README.md b/NVIDIA/DeepLearningExamples/PyTorch/README.md
new file mode 100644
index 0000000..4f5a277
--- /dev/null
+++ b/NVIDIA/DeepLearningExamples/PyTorch/README.md
@@ -0,0 +1,41 @@
+
+### Seting paths
+
+```
+MODEL_NAME=nnunet_for_pytorch
+MODEL_VERSION=21.11.0
+MODEL_BASE=/proj/nsc_testing/xuan/containers/nvidia_pytorch_21.11-py3.sif
+CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODEL_NAME}_${MODEL_VERSION}.sif
+DEF_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/${MODEL_NAME}_${MODEL_VERSION}.def
+WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet
+```
+### Building the container
+
+```
+apptainer build $MODEL_BASE  docker://nvcr.io/nvidia/pytorch:21.11-py3
+apptainer build $CONTAINER_DIR $DEF_DIR
+```
+
+
+### Downloading and preprocessing the data
+
+```
+apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python download.py --task 01  
+apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR  python /workspace/nnunet_pyt/preprocess.py --task 01 --dim 2
+```
+
+
+
+### Running benchmarking 
+
+```
+apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python scripts/benchmark.py --mode train --gpus 1 --dim 2 --batch_size 256 --amp
+apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python scripts/benchmark.py --mode predict --gpus 1 --dim 2 --batch_size 256 --amp
+```
+
+### Running benchmarking using batch jobs
+
+```
+bash submit_benchmark_jobs.sh
+```
+
diff --git a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/build.txt b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/build.txt
index 1a679da..2bcba44 100644
--- a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/build.txt
+++ b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/build.txt
@@ -2,8 +2,6 @@ https://catalog.ngc.nvidia.com/orgs/nvidia/resources/nnunet_for_pytorch/quick-st
 
 apptainer build containers/nvidia_pytorch_21.11-py3.sif  docker://nvcr.io/nvidia/pytorch:21.11-py3
 
-export APPTAINER_BINDPATH=
-
 MODULE_NAME=nnunet_for_pytorch
 MODULE_VERSION=21.11.0
 CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODULE_NAME}_${MODULE_VERSION}.sif
-- 
GitLab