mlcommons · davidjurado · Dec 13, 2023 · Dec 13, 2023 · Dec 15, 2023 · Dec 15, 2023
@@ -0,0 +1 @@
+mlcube/workspace/*
@@ -0,0 +1 @@
+mlcube/workspace/*
@@ -2,15 +2,17 @@ ARG FROM_IMAGE_NAME=pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
 #ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:21.02-py3
 FROM ${FROM_IMAGE_NAME}
 
-ADD . /workspace/unet3d
-WORKDIR /workspace/unet3d
 
 RUN apt-get update && \
     apt-get upgrade -y && \
-    apt-get install -y git
-RUN apt-get install -y vim
+    apt-get install -y git vim wget unzip
 
 RUN pip install --upgrade pip
-RUN pip install --disable-pip-version-check -r requirements.txt
+COPY requirements.txt /
+RUN pip install --disable-pip-version-check -r /requirements.txt
+
+ADD . /workspace/unet3d
+RUN chmod +x /workspace/unet3d/*.sh
+WORKDIR /workspace/unet3d
 
 #RUN pip uninstall -y apex; pip uninstall -y apex; git clone --branch seryilmaz/fused_dropout_softmax  https://github.com/seryilmaz/apex.git; cd apex;  pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--xentropy" --global-option="--deprecated_fused_adam" --global-option="--deprecated_fused_lamb" --global-option="--fast_multihead_attn" .
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -e
+
+: "${DATASET_PATH:=/}"
+
+while [ "$1" != "" ]; do
+    case $1 in
+    --data_dir=*)
+        DATASET_PATH="${1#*=}"
+        ;;
+    esac
+    shift
+done
+
+git clone https://github.com/neheller/kits19
+cd kits19
+cp -r data/* $DATASET_PATH
+rm -r data/
+ln -s $DATASET_PATH data
+pip install -r requirements.txt
+python -m starter_code.get_imaging
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -e
+
+: "${DATASET_PATH:=/}"
+
+while [ "$1" != "" ]; do
+    case $1 in
+    --data_dir=*)
+        DATASET_PATH="${1#*=}"
+        ;;
+    esac
+    shift
+done
+
+wget https://mlcube.mlcommons-storage.org/minibenchmarks/3d_unet.zip
+unzip -o 3d_unet.zip -d $DATASET_PATH
+rm 3d_unet.zip
@@ -0,0 +1,56 @@
+# MLCube for 3D Unet
+
+MLCube™ GitHub [repository](https://github.com/mlcommons/mlcube). MLCube™ [wiki](https://mlcommons.github.io/mlcube/).
+
+## Project setup
+
+An important requirement is that you must have Docker installed.
+
+```bash
+# Create Python environment and install MLCube Docker runner 
+virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-docker
+# Fetch the implementation from GitHub
+git clone https://github.com/mlcommons/training && cd ./training/image_segmentation/pytorch/mlcube
+```
+
+Inside the mlcube directory run the following command to check implemented tasks.
+
+```shell
+mlcube describe
+```
+
+### MLCube tasks
+
+Download dataset.
+
+```shell
+mlcube run --task=download_data -Pdocker.build_strategy=always
+```
+
+Process dataset.
+
+```shell
+mlcube run --task=process_data -Pdocker.build_strategy=always
+```
+
+Train SSD.
+
+```shell
+mlcube run --task=train -Pdocker.build_strategy=always
+```
+
+### Execute the complete pipeline
+
+You can execute the complete pipeline with one single command.
+
+```shell
+mlcube run --task=download_data,process_data,train -Pdocker.build_strategy=always
+```
+
+## Run a quick demo
+
+You can run a quick demo that first downloads a tiny dataset and then executes a short training workload.
+
+```shell
+mlcube run --task=download_demo,demo -Pdocker.build_strategy=always
+```
@@ -0,0 +1,50 @@
+name: 3d_unet
+description: Image Segmentation benchmark
+authors:
+  - { name: "MLCommons Best Practices Working Group" }
+
+platform:
+  accelerator_count: 1
+
+docker:
+  # Image name.
+  image: mlcommons/3d_unet:0.0.1
+  # Docker build context relative to $MLCUBE_ROOT. Default is `build`.
+  build_context: "../"
+  # Docker file name within docker build context, default is `Dockerfile`.
+  build_file: "Dockerfile"
+  # GPU arguments
+  gpu_args: "--shm-size=1g --gpus=all"
+
+tasks:
+  download_data:
+    entrypoint: ./download_data.sh -a
+    parameters:
+      outputs:
+        data_dir: data/
+  process_data:
+    entrypoint: ./process_data.sh -a
+    parameters:
+      inputs:
+        data_dir: data/
+      outputs:
+        processed_data: processed_data/
+  train:
+    entrypoint: ./run_mlcube.sh -a
+    parameters:
+      inputs:
+        dataset_dir: processed_data/
+      outputs:
+        log_dir: logs/
+  download_demo:
+    entrypoint: ./download_demo.sh -a
+    parameters:
+      outputs:
+        data_dir: demo_data/
+  demo:
+    entrypoint: ./run_demo.sh -a
+    parameters:
+      inputs:
+        dataset_dir: demo_data/
+      outputs:
+        log_dir: demo_logs/
@@ -58,7 +58,7 @@ def __init__(self, args):
     def preprocess_dataset(self):
         os.makedirs(self.results_dir, exist_ok=True)
         print(f"Preprocessing {self.data_dir}")
-        for case in sorted([f for f in os.listdir(self.data_dir) if "case" in f]):
+        for case in tqdm(sorted([f for f in os.listdir(self.data_dir) if "case" in f])):
             case_id = int(case.split("_")[1])
             if case_id in EXCLUDED_CASES or case_id >= MAX_ID:
                 print("Case {}. Skipped.".format(case_id))

@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+
+: "${data_dir:=/}"
+: "${processed_data:=/}"
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --data_dir=*)
+    DATA_DIR="${1#*=}"
+    ;;
+  --processed_data=*)
+    PROCESSED_DATA="${1#*=}"
+    ;;
+  *) ;;
+  esac
+  shift
+done
+
+python preprocess_dataset.py --data_dir $DATA_DIR --results_dir $PROCESSED_DATA
@@ -0,0 +1,76 @@
+#!/bin/bash
+set -e
+
+# runs benchmark and reports time to convergence
+# to use the script:
+#   run_and_time.sh <random seed 1-5>
+
+: "${SEED:=0}"
+: "${DATASET_DIR:=/data}"
+: "${LOG_DIR:=/results}"
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --dataset_dir=*)
+    DATASET_DIR="${1#*=}"
+    ;;
+  --log_dir=*)
+    LOG_DIR="${1#*=}"
+    ;;
+  *) ;;
+  esac
+  shift
+done
+
+ln -s $LOG_DIR /results
+
+MAX_EPOCHS=50
+QUALITY_THRESHOLD="0.908"
+START_EVAL_AT=50
+EVALUATE_EVERY=1
+LEARNING_RATE="0.8"
+LR_WARMUP_EPOCHS=1
+BATCH_SIZE=2
+GRADIENT_ACCUMULATION_STEPS=1
+
+
+if [ -d ${DATASET_DIR} ]
+then
+    # start timing
+    start=$(date +%s)
+    start_fmt=$(date +%Y-%m-%d\ %r)
+    echo "STARTING TIMING RUN AT $start_fmt"
+
+# CLEAR YOUR CACHE HERE
+  python -c "
+from mlperf_logging.mllog import constants
+from runtime.logging import mllog_event
+mllog_event(key=constants.CACHE_CLEAR, value=True)"
+
+  python main.py --data_dir ${DATASET_DIR} \
+    --epochs ${MAX_EPOCHS} \
+    --evaluate_every ${EVALUATE_EVERY} \
+    --start_eval_at ${START_EVAL_AT} \
+    --quality_threshold ${QUALITY_THRESHOLD} \
+    --batch_size ${BATCH_SIZE} \
+    --optimizer sgd \
+    --ga_steps ${GRADIENT_ACCUMULATION_STEPS} \
+    --learning_rate ${LEARNING_RATE} \
+    --seed ${SEED} \
+    --lr_warmup_epochs ${LR_WARMUP_EPOCHS}
+
+	# end timing
+	end=$(date +%s)
+	end_fmt=$(date +%Y-%m-%d\ %r)
+	echo "ENDING TIMING RUN AT $end_fmt"
+
+
+	# report result
+	result=$(( $end - $start ))
+	result_name="image_segmentation"
+
+
+	echo "RESULT,$result_name,$SEED,$result,$USER,$start_fmt"
+else
+	echo "Directory ${DATASET_DIR} does not exist"
+fi
@@ -0,0 +1,76 @@
+#!/bin/bash
+set -e
+
+# runs benchmark and reports time to convergence
+# to use the script:
+#   run_and_time.sh <random seed 1-5>
+
+: "${SEED:=0}"
+: "${DATASET_DIR:=/data}"
+: "${LOG_DIR:=/results}"
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --dataset_dir=*)
+    DATASET_DIR="${1#*=}"
+    ;;
+  --log_dir=*)
+    LOG_DIR="${1#*=}"
+    ;;
+  *) ;;
+  esac
+  shift
+done
+
+ln -s $LOG_DIR /results
+
+MAX_EPOCHS=4000
+QUALITY_THRESHOLD="0.908"
+START_EVAL_AT=1000
+EVALUATE_EVERY=20
+LEARNING_RATE="0.8"
+LR_WARMUP_EPOCHS=200
+BATCH_SIZE=2
+GRADIENT_ACCUMULATION_STEPS=1
+
+
+if [ -d ${DATASET_DIR} ]
+then
+    # start timing
+    start=$(date +%s)
+    start_fmt=$(date +%Y-%m-%d\ %r)
+    echo "STARTING TIMING RUN AT $start_fmt"
+
+# CLEAR YOUR CACHE HERE
+  python -c "
+from mlperf_logging.mllog import constants
+from runtime.logging import mllog_event
+mllog_event(key=constants.CACHE_CLEAR, value=True)"
+
+  python main.py --data_dir ${DATASET_DIR} \
+    --epochs ${MAX_EPOCHS} \
+    --evaluate_every ${EVALUATE_EVERY} \
+    --start_eval_at ${START_EVAL_AT} \
+    --quality_threshold ${QUALITY_THRESHOLD} \
+    --batch_size ${BATCH_SIZE} \
+    --optimizer sgd \
+    --ga_steps ${GRADIENT_ACCUMULATION_STEPS} \
+    --learning_rate ${LEARNING_RATE} \
+    --seed ${SEED} \
+    --lr_warmup_epochs ${LR_WARMUP_EPOCHS}
+
+	# end timing
+	end=$(date +%s)
+	end_fmt=$(date +%Y-%m-%d\ %r)
+	echo "ENDING TIMING RUN AT $end_fmt"
+
+
+	# report result
+	result=$(( $end - $start ))
+	result_name="image_segmentation"
+
+
+	echo "RESULT,$result_name,$SEED,$result,$USER,$start_fmt"
+else
+	echo "Directory ${DATASET_DIR} does not exist"
+fi