diff --git a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/README.md b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/README.md index 907ee108fc434bafa267e3fc3c6804f0d9b38590..fcd785f17083071250b19c8d58406ca47e02872e 100644 --- a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/README.md +++ b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/README.md @@ -2,17 +2,19 @@ ### Setting paths ``` -MODEL_NAME=nnunet_for_pytorch -MODEL_VERSION=21.11.0 -MODEL_BASE=/proj/nsc_testing/xuan/containers/nvidia_pytorch_21.11-py3.sif +MODEL_NAME=maskrcnn_for_pytorch +MODEL_VERSION=latest +MODEL_BASE=/proj/nsc_testing/xuan/containers/nvidia_pytorch_21.12-py3.sif CONTAINER_DIR=/proj/nsc_testing/xuan/containers/${MODEL_NAME}_${MODEL_VERSION}.sif -DEF_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/${MODEL_NAME}_${MODEL_VERSION}.def -WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet +DEF_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/${MODEL_NAME}_${MODEL_VERSION}.def +WORK_DIR=/proj/nsc_testing/xuan/berzelius-benchmarks/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN + +mkdir -p $WORK_DIR/data $WORK_DIR/results ``` ### Building the container ``` -apptainer build $MODEL_BASE docker://nvcr.io/nvidia/pytorch:21.11-py3 +apptainer build $MODEL_BASE docker://nvcr.io/nvidia/pytorch:21.12-py3 apptainer build $CONTAINER_DIR $DEF_DIR ``` @@ -20,8 +22,10 @@ apptainer build $CONTAINER_DIR $DEF_DIR ### Downloading and preprocessing the data ``` -apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python download.py --task 01 -apptainer exec --nv -B ${WORK_DIR}/data:/data -B ${WORK_DIR}/results:/results --pwd /workspace/nnunet_pyt $CONTAINER_DIR python /workspace/nnunet_pyt/preprocess.py --task 01 --dim 2 +apptainer exec --nv -B ${WORK_DIR}/data:/data --pwd /data $CONTAINER_DIR bash -c "cp /workspace/object_detection/hashes.md5 /data/ && bash /workspace/object_detection/download_dataset.sh /data" + +apptainer exec --nv $CONTAINER_DIR bash -c "cp -a /workspace/object_detection/* ${WORK_DIR}/" +apptainer exec --nv -B ${WORK_DIR}/data:/datasets/data -B ${WORK_DIR}/results:/results --pwd ${WORK_DIR} $CONTAINER_DIR bash scripts/train_benchmark.sh fp16 1 True True ``` diff --git a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/build.txt b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/build.txt index b2b105102f28b165a7bf5b43e563100d1e53436a..f0502261684f418ebdfd791ca8c01c8832f46d6d 100644 --- a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/build.txt +++ b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/build.txt @@ -1,6 +1,6 @@ https://catalog.ngc.nvidia.com/orgs/nvidia/resources/nnunet_for_pytorch/quick-start-guide -apptainer build containers/nvidia_pytorch_21.11-py3.sif docker://nvcr.io/nvidia/pytorch:21.11-py3 +apptainer build containers/nvidia_pytorch_21.12-py3.sif docker://nvcr.io/nvidia/pytorch:21.12-py3 MODULE_NAME=nnunet_for_pytorch MODULE_VERSION=21.11.0 diff --git a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/maskrcnn_for_pytorch_latest.def b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/maskrcnn_for_pytorch_latest.def index 65af3a2211e0a75a67236df542e3ab40b40c2287..8466e44dda9291a5983af2e91198fa002be75812 100644 --- a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/maskrcnn_for_pytorch_latest.def +++ b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/maskrcnn_for_pytorch_latest.def @@ -1,30 +1,35 @@ Bootstrap: localimage -From: /proj/nsc_testing/xuan/containers/nvidia_pytorch_22.11-py3.sif +From: /proj/nsc_testing/xuan/containers/nvidia_pytorch_21.12-py3.sif %environment export PYTHONNOUSERSITE=True -export OMP_NUM_THREADS=2 +export OMP_NUM_THREADS=1 %post -VERSION=latest +apt-get update && apt-get install -y libgl1-mesa-dev -mkdir /workspace/nnunet_pyt -cd /workspace/nnunet_pyt +pip install --upgrade --no-cache-dir pip \ + && pip install --no-cache-dir \ + mlperf-compliance==0.0.10 \ + opencv-python==4.4.0.42 \ + git+https://github.com/NVIDIA/dllogger \ + yacs \ + dtrx + +mkdir /workspace/object_detection +cd /workspace/object_detection git clone https://github.com/NVIDIA/DeepLearningExamples.git -mv DeepLearningExamples/PyTorch/Segmentation/nnUNet/* ./ +mv DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/pytorch/* ./ +mv DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/download_dataset.sh ./ +mv DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/download_weights.sh ./ +mv DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/hashes.md5 ./ +mv DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/weights.md5 ./ + +pip install -e . +/opt/conda/bin/conda install -y numpy rm -rf DeepLearningExamples -pip install --disable-pip-version-check -r requirements.txt -pip install monai==1.0.0 --no-dependencies -pip install numpy --upgrade -pip install torchmetrics==0.11.4 - -curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" -unzip -qq awscliv2.zip -./aws/install -rm -rf awscliv2.zip aws - -cp utils/instance_norm.py /usr/local/lib/python3.8/dist-packages/apex/normalization + \ No newline at end of file diff --git a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/nnunet_for_pytorch_latest.def b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/nnunet_for_pytorch_latest.def index 65af3a2211e0a75a67236df542e3ab40b40c2287..d06cd6806a4cef405d06bf5dad50c299213fe8af 100644 --- a/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/nnunet_for_pytorch_latest.def +++ b/NVIDIA/DeepLearningExamples/PyTorch/Segmentation/nnUNet/nnunet_for_pytorch_latest.def @@ -9,8 +9,6 @@ export OMP_NUM_THREADS=2 %post -VERSION=latest - mkdir /workspace/nnunet_pyt cd /workspace/nnunet_pyt git clone https://github.com/NVIDIA/DeepLearningExamples.git