diff --git a/.devcontainer/onCreateCommand.sh b/.devcontainer/onCreateCommand.sh index 2d8463aef..f923f6f36 100755 --- a/.devcontainer/onCreateCommand.sh +++ b/.devcontainer/onCreateCommand.sh @@ -6,4 +6,4 @@ pip install wheel pip install openvino-dev==2023.0.1 # [OPTIONAL] to generate optimized models for inference pip install mlcube_docker # [OPTIONAL] to deploy GaNDLF models as MLCube-compliant Docker containers pip install medmnist==2.1.0 -pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu +pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh index 384db4547..f6914c4a2 100755 --- a/.devcontainer/postCreateCommand.sh +++ b/.devcontainer/postCreateCommand.sh @@ -6,9 +6,9 @@ # if runnning on a GPU machine, install the GPU version of pytorch if command -v nvidia-smi &> /dev/null then - pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 + pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118 fi pip install -e . -python ./gandlf_verifyInstall +gandlf verify-install gzip -dk -r tutorials/classification_medmnist_notebook/medmnist/dataset diff --git a/.github/ISSUE_TEMPLATE/---bug-report.md b/.github/ISSUE_TEMPLATE/---bug-report.md index e6e2f0e90..6b05dae19 100644 --- a/.github/ISSUE_TEMPLATE/---bug-report.md +++ b/.github/ISSUE_TEMPLATE/---bug-report.md @@ -25,7 +25,7 @@ If applicable, add images, screenshots or other relevant media to help explain y ### Environment information GaNDLF version, OS, and any other relevant information. diff --git a/.github/ISSUE_TEMPLATE/--questions-help-support.md b/.github/ISSUE_TEMPLATE/--questions-help-support.md index 772f48598..5586e0efd 100644 --- a/.github/ISSUE_TEMPLATE/--questions-help-support.md +++ b/.github/ISSUE_TEMPLATE/--questions-help-support.md @@ -13,7 +13,7 @@ https://mlcommons.github.io/GaNDLF/faq ### Environment information GaNDLF version, OS, and any other relevant information. diff --git a/.github/workflows/codacy.yml b/.github/workflows/codacy.yml index efc945c85..de9c6eba7 100644 --- a/.github/workflows/codacy.yml +++ b/.github/workflows/codacy.yml @@ -16,9 +16,7 @@ name: Codacy Security Scan on: push: branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] + pull_request: {} permissions: contents: read diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 65a7ceb99..4a8c4338f 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -14,9 +14,7 @@ name: "CodeQL" on: push: branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] + pull_request: {} schedule: - cron: '25 5 * * 2' diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index bf11ee661..730acbff5 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -3,8 +3,7 @@ name: Dev-Container CI on: push: branches: [ master ] - pull_request: - branches: [ master ] + pull_request: {} jobs: dev-container-test: diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index ed496073c..502467131 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -4,8 +4,7 @@ on: push: branches: [ master ] tags: [ '*.*.*' ] - pull_request: - branches: [ master ] + pull_request: {} workflow_dispatch: inputs: versionTag: diff --git a/.github/workflows/mlcube-test.yml b/.github/workflows/mlcube-test.yml index 69a94fa31..ed38bf647 100644 --- a/.github/workflows/mlcube-test.yml +++ b/.github/workflows/mlcube-test.yml @@ -1,39 +1,36 @@ -# This workflow will test gandlf_deploy for model and metrics MLCubes +# This workflow will test gandlf deploy for model and metrics MLCubes name: MLCube-Test on: push: - branches: [ master ] - pull_request: - branches: [ master ] - + branches: [master] + pull_request: {} jobs: test-deploy: - runs-on: ubuntu-latest steps: - - name: Free space - run: | - df -h - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - sudo rm -rf "$ANDROID_SDK_ROOT" - df -h - - name: Checkout - uses: actions/checkout@v3 - - # Use changed-files-specific action to collect file changes. - # The following commented condition applied to a step will run that step only if non-docs files have changed. - # It should be applied to all functionality-related steps. - # if: steps.changed-files-specific.outputs.only_modified == 'false' - - name: Detect and screen file changes - id: changed-files-specific - uses: tj-actions/changed-files@v41 - with: + - name: Free space + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf "$ANDROID_SDK_ROOT" + df -h + - name: Checkout + uses: actions/checkout@v3 + + # Use changed-files-specific action to collect file changes. + # The following commented condition applied to a step will run that step only if non-docs files have changed. + # It should be applied to all functionality-related steps. + # if: steps.changed-files-specific.outputs.only_modified == 'false' + - name: Detect and screen file changes + id: changed-files-specific + uses: tj-actions/changed-files@v41 + with: files: | .github/*.md .github/ISSUE_TEMPLATE/*.md @@ -46,37 +43,37 @@ jobs: LICENSE Dockerfile-* - - name: Summarize docs and non-docs modifications - run: | - echo "List of docs files that have changed: ${{ steps.changed-files-specific.outputs.all_modified_files }}" - echo "Changed non-docs files: ${{ steps.changed-files-specific.outputs.other_modified_files }}" - - # This second step is unnecessary but highly recommended because - # It will cache database and saves time re-downloading it if database isn't stale. - - name: Cache pip - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- - - name: Set up Python 3.9 - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install dependencies and package - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - sudo apt-get update - sudo apt-get install libvips libvips-tools -y - python -m pip install --upgrade pip==24.0 - python -m pip install wheel - python -m pip install openvino-dev==2023.0.1 mlcube_docker - pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu - pip install -e . - - name: Run mlcube deploy tests - working-directory: ./testing - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - sh test_deploy.sh + - name: Summarize docs and non-docs modifications + run: | + echo "List of docs files that have changed: ${{ steps.changed-files-specific.outputs.all_modified_files }}" + echo "Changed non-docs files: ${{ steps.changed-files-specific.outputs.other_modified_files }}" + + # This second step is unnecessary but highly recommended because + # It will cache database and saves time re-downloading it if database isn't stale. + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Set up Python 3.9 + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install dependencies and package + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + sudo apt-get update + sudo apt-get install libvips libvips-tools -y + python -m pip install --upgrade pip==24.0 + python -m pip install wheel + python -m pip install openvino-dev==2023.0.1 mlcube_docker + pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu + pip install -e . + - name: Run mlcube deploy tests + working-directory: ./testing + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + sh test_deploy.sh diff --git a/.github/workflows/openfl-test.yml b/.github/workflows/openfl-test.yml index 727a13de0..cf78b3b06 100644 --- a/.github/workflows/openfl-test.yml +++ b/.github/workflows/openfl-test.yml @@ -5,36 +5,33 @@ name: OpenFL-Test on: push: - branches: [ master ] - pull_request: - branches: [ master ] - + branches: [master] + pull_request: {} jobs: openfl-test: - runs-on: ubuntu-latest steps: - - name: Free space - run: | - df -h - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - sudo rm -rf "$ANDROID_SDK_ROOT" - df -h - - name: Checkout - uses: actions/checkout@v3 - - # Use changed-files-specific action to collect file changes. - # The following commented condition applied to a step will run that step only if non-docs files have changed. - # It should be applied to all functionality-related steps. - # if: steps.changed-files-specific.outputs.only_modified == 'false' - - name: Detect and screen file changes - id: changed-files-specific - uses: tj-actions/changed-files@v41 - with: + - name: Free space + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf "$ANDROID_SDK_ROOT" + df -h + - name: Checkout + uses: actions/checkout@v3 + + # Use changed-files-specific action to collect file changes. + # The following commented condition applied to a step will run that step only if non-docs files have changed. + # It should be applied to all functionality-related steps. + # if: steps.changed-files-specific.outputs.only_modified == 'false' + - name: Detect and screen file changes + id: changed-files-specific + uses: tj-actions/changed-files@v41 + with: files: | .github/*.md .github/ISSUE_TEMPLATE/*.md @@ -47,59 +44,60 @@ jobs: LICENSE Dockerfile-* - - name: Summarize docs and non-docs modifications - run: | - echo "List of docs files that have changed: ${{ steps.changed-files-specific.outputs.all_modified_files }}" - echo "Changed non-docs files: ${{ steps.changed-files-specific.outputs.other_modified_files }}" - - # This second step is unnecessary but highly recommended because - # It will cache database and saves time re-downloading it if database isn't stale. - - name: Cache pip - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- - - name: Set up Python 3.9 - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install dependencies and package - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - sudo apt-get update - sudo apt-get install libvips libvips-tools -y - python -m pip install --upgrade pip==24.0 - python -m pip install wheel - pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu - pip install -e . - - name: Run generic unit tests to download data and construct CSVs - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml -k "prepare_data_for_ci" - # openfl tests start here - - name: Run OpenFL tests - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - echo "Removing onnx because of protobuf version conflict" - pip uninstall onnx -y - echo "Installing OpenFL" - git clone --depth=1 https://github.com/securefederatedai/openfl.git - cd openfl - git fetch --tags - # echo "Checkout the latest OpenFL tag" - # latestTag=$(git describe --tags "$(git rev-list --tags --max-count=1)") - # git checkout $latestTag - # sed -i -e 's/protobuf==3.19.6/protobuf/g' setup.py ## this should NOT be there - pip install -e . - cd .. - echo "Copying files to appropriate directories and updated headers" - head -n 1 testing/data/train_2d_rad_segmentation.csv > /home/runner/work/GaNDLF/GaNDLF/openfl/valid.csv - tail -n +9 testing/data/train_2d_rad_segmentation.csv >> /home/runner/work/GaNDLF/GaNDLF/openfl/valid.csv - head -n 8 testing/data/train_2d_rad_segmentation.csv > /home/runner/work/GaNDLF/GaNDLF/openfl/train.csv - sed -i 's/# n_channels: 3/num_channels: 3/g' testing/config_segmentation.yaml - config_to_use=$(pwd)/testing/config_segmentation.yaml - cd openfl - python -m tests.github.test_gandlf --template gandlf_seg_test --fed_workspace aggregator --col1 one --col2 two --rounds-to-train 1 --gandlf_config $config_to_use + - name: Summarize docs and non-docs modifications + run: | + echo "List of docs files that have changed: ${{ steps.changed-files-specific.outputs.all_modified_files }}" + echo "Changed non-docs files: ${{ steps.changed-files-specific.outputs.other_modified_files }}" + + # This second step is unnecessary but highly recommended because + # It will cache database and saves time re-downloading it if database isn't stale. + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Set up Python 3.9 + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install dependencies and package + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + sudo apt-get update + sudo apt-get install libvips libvips-tools -y + python -m pip install --upgrade pip==24.0 + python -m pip install wheel + pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu + pip install -e . + - name: Run generic unit tests to download data and construct CSVs + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml -k "prepare_data_for_ci" + # openfl tests start here + - name: Run OpenFL tests + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + echo "Removing onnx because of protobuf version conflict" + pip uninstall onnx -y + echo "Installing OpenFL" + git clone --depth=1 https://github.com/securefederatedai/openfl.git + cd openfl + git fetch --tags + # echo "Checkout the latest OpenFL tag" + # latestTag=$(git describe --tags "$(git rev-list --tags --max-count=1)") + # git checkout $latestTag + # sed -i -e 's/protobuf==3.19.6/protobuf/g' setup.py ## this should NOT be there + pip install -e . + cd .. + echo "Copying files to appropriate directories and updated headers" + head -n 1 testing/data/train_2d_rad_segmentation.csv > /home/runner/work/GaNDLF/GaNDLF/openfl/valid.csv + tail -n +9 testing/data/train_2d_rad_segmentation.csv >> /home/runner/work/GaNDLF/GaNDLF/openfl/valid.csv + head -n 8 testing/data/train_2d_rad_segmentation.csv > /home/runner/work/GaNDLF/GaNDLF/openfl/train.csv + sed -i 's/# n_channels: 3/num_channels: 3/g' testing/config_segmentation.yaml + config_to_use=$(pwd)/testing/config_segmentation.yaml + cd openfl + python -m tests.github.test_gandlf --template gandlf_seg_test --fed_workspace aggregator --col1 one --col2 two --rounds-to-train 1 --gandlf_config $config_to_use + diff --git a/.github/workflows/ossar-analysis.yml b/.github/workflows/ossar-analysis.yml index ffa79491c..b0323a7b2 100644 --- a/.github/workflows/ossar-analysis.yml +++ b/.github/workflows/ossar-analysis.yml @@ -6,9 +6,7 @@ name: OSSAR on: push: branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] + pull_request: {} schedule: - cron: '32 20 * * 3' diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index c490f2ba4..c99262a9f 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -5,36 +5,33 @@ name: CI-PyTest on: push: - branches: [ master ] - pull_request: - branches: [ master ] - + branches: [master] + pull_request: {} jobs: full-test: - runs-on: ubuntu-latest steps: - - name: Free space - run: | - df -h - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - sudo rm -rf "$ANDROID_SDK_ROOT" - df -h - - name: Checkout - uses: actions/checkout@v3 - - # Use changed-files-specific action to collect file changes. - # The following commented condition applied to a step will run that step only if non-docs files have changed. - # It should be applied to all functionality-related steps. - # if: steps.changed-files-specific.outputs.only_modified == 'false' - - name: Detect and screen file changes - id: changed-files-specific - uses: tj-actions/changed-files@v41 - with: + - name: Free space + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf "$ANDROID_SDK_ROOT" + df -h + - name: Checkout + uses: actions/checkout@v3 + + # Use changed-files-specific action to collect file changes. + # The following commented condition applied to a step will run that step only if non-docs files have changed. + # It should be applied to all functionality-related steps. + # if: steps.changed-files-specific.outputs.only_modified == 'false' + - name: Detect and screen file changes + id: changed-files-specific + uses: tj-actions/changed-files@v41 + with: files: | .github/*.md .github/ISSUE_TEMPLATE/*.md @@ -47,77 +44,72 @@ jobs: LICENSE Dockerfile-* - - name: Summarize docs and non-docs modifications - run: | - echo "List of docs files that have changed: ${{ steps.changed-files-specific.outputs.all_modified_files }}" - echo "Changed non-docs files: ${{ steps.changed-files-specific.outputs.other_modified_files }}" - - # This second step is unnecessary but highly recommended because - # It will cache database and saves time re-downloading it if database isn't stale. - - name: Cache pip - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- - - name: Check dev version - run: | # Get current canonical version, append current date as an identifier - currentVer=$(grep -oP '__version__ = \K.*' ./GANDLF/version.py) - currentVer=${currentVer//\"} - echo "Current version is $currentVer" - if [[ $currentVer == *"dev"* ]]; then - echo "Nightly will be published" - echo "publish_nightly=true" >> $GITHUB_ENV - else - echo "Nightly will not be published" - echo "publish_nightly=false" >> $GITHUB_ENV - fi - - name: Set up Python 3.9 - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install dependencies and package - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - sudo apt-get update - sudo apt-get install libvips libvips-tools -y - python -m pip install --upgrade pip==24.0 - python -m pip install wheel - python -m pip install openvino-dev==2023.0.1 mlcube_docker - pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu - pip install -e . - - name: Run generic unit tests - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml -k "generic" - - name: Run classification unit tests with histology - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml --cov-append -k "classification and histology" - - name: Run classification unit tests - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml --cov-append -k "classification and not histology" - - name: Run segmentation unit tests - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml --cov-append -k "segmentation and not transunet" - - name: Run regression unit tests - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml --cov-append -k "regression" - - name: Run transunet unit tests - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml --cov-append -k "transunet" + - name: Summarize docs and non-docs modifications + run: | + echo "List of docs files that have changed: ${{ steps.changed-files-specific.outputs.all_modified_files }}" + echo "Changed non-docs files: ${{ steps.changed-files-specific.outputs.other_modified_files }}" + # This second step is unnecessary but highly recommended because + # It will cache database and saves time re-downloading it if database isn't stale. + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Set up Python 3.9 + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install dependencies and package + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + sudo apt-get update + sudo apt-get install libvips libvips-tools -y + python -m pip install --upgrade pip==24.0 + python -m pip install wheel + python -m pip install openvino-dev==2023.0.1 mlcube_docker + pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu + pip install -e . + - name: Run generic unit tests + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml -k "generic" + - name: Run classification unit tests with histology + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "classification and histology" + - name: Run classification unit tests + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "classification and not histology" + - name: Run segmentation unit tests + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "segmentation and not transunet" + - name: Run regression unit tests + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "regression" + - name: Run transunet unit tests + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "transunet" + - name: Run entrypoints tests + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "entrypoints" + - name: Run test for update_version + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "update_version" - - name: Upload coverage - if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change - uses: codecov/codecov-action@v1 - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: ./coverage.xml - flags: unittests + - name: Upload coverage + if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change + uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: ./coverage.xml + flags: unittests \ No newline at end of file diff --git a/.gitignore b/.gitignore index f88bc5703..d40b4a5df 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,5 @@ tutorials/classification_medmnist_notebook/output_stats .jupyter_ystore.db tutorials/classification_medmnist_notebook/model tutorials/classification_medmnist_notebook/dataset/*.csv +testing/test_deploy +tmp diff --git a/Dockerfile-CPU b/Dockerfile-CPU index 1cb510679..0337e012a 100644 --- a/Dockerfile-CPU +++ b/Dockerfile-CPU @@ -9,7 +9,7 @@ RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1 RUN python3.9 -m pip install --upgrade pip==24.0 # EXPLICITLY install cpu versions of torch/torchvision (not all versions have +cpu modes on PyPI...) -RUN python3.9 -m pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu +RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu RUN python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker # Do some dependency installation separately here to make layer caching more efficient @@ -20,11 +20,11 @@ RUN python3.9 -c "from setup import requirements; file = open('requirements.txt' COPY . /GaNDLF WORKDIR /GaNDLF RUN python3.9 -m pip install -e . -# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf_run -# If a user calls "docker run gandlf:[tag] gandlf_anonymize", it will resolve to running "python gandlf_anonymize" instead. +# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf run +# If a user calls "docker run gandlf:[tag] anonymize", it will resolve to running "gandlf anonymize" instead. # CMD is inherently overridden by args to "docker run", entrypoint is constant. -ENTRYPOINT python3.9 -CMD gandlf_run +ENTRYPOINT gandlf +CMD run # The below force the container commands to run as a nonroot user with UID > 10000. # This greatly reduces UID collision probability between container and host, helping prevent privilege escalation attacks. diff --git a/Dockerfile-CUDA11.8 b/Dockerfile-CUDA11.8 index 9d5de251f..68eb1506b 100644 --- a/Dockerfile-CUDA11.8 +++ b/Dockerfile-CUDA11.8 @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y software-properties-common RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1 RUN python3.9 -m pip install --upgrade pip==24.0 -RUN python3.9 -m pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 +RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118 RUN python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker # Do some dependency installation separately here to make layer caching more efficient @@ -24,11 +24,11 @@ COPY . /GaNDLF WORKDIR /GaNDLF RUN python3.9 -m pip install -e . -# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf_run -# If a user calls "docker run gandlf:[tag] gandlf_anonymize", it will resolve to running "python gandlf_anonymize" instead. +# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf run +# If a user calls "docker run gandlf:[tag] anonymize", it will resolve to running "gandlf anonymize" instead. # CMD is inherently overridden by args to "docker run", entrypoint is constant. -ENTRYPOINT python3.9 -CMD gandlf_run +ENTRYPOINT gandlf +CMD run # The below force the container commands to run as a nonroot user with UID > 10000. # This greatly reduces UID collision probability between container and host, helping prevent privilege escalation attacks. diff --git a/Dockerfile-CUDA12.1 b/Dockerfile-CUDA12.1 index 8d4bc90f8..28a3287e7 100644 --- a/Dockerfile-CUDA12.1 +++ b/Dockerfile-CUDA12.1 @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y software-properties-common RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1 RUN python3.9 -m pip install --upgrade pip==24.0 -RUN python3.9 -m pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 +RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121 RUN python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker # Do some dependency installation separately here to make layer caching more efficient @@ -24,11 +24,11 @@ COPY . /GaNDLF WORKDIR /GaNDLF RUN python3.9 -m pip install -e . -# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf_run -# If a user calls "docker run gandlf:[tag] gandlf_anonymize", it will resolve to running "python gandlf_anonymize" instead. +# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf run +# If a user calls "docker run gandlf:[tag] anonymize", it will resolve to running "gandlf anonymize" instead. # CMD is inherently overridden by args to "docker run", entrypoint is constant. -ENTRYPOINT python3.9 -CMD gandlf_run +ENTRYPOINT gandlf +CMD run # The below force the container commands to run as a nonroot user with UID > 10000. # This greatly reduces UID collision probability between container and host, helping prevent privilege escalation attacks. diff --git a/Dockerfile-ROCm b/Dockerfile-ROCm index 8c81089fe..d45382289 100644 --- a/Dockerfile-ROCm +++ b/Dockerfile-ROCm @@ -1,4 +1,4 @@ -FROM rocm/pytorch:rocm5.6_ubuntu20.04_py3.8_pytorch_1.12.1 +FROM rocm/pytorch:rocm5.7_ubuntu20.04_py3.9_pytorch_2.0.1 LABEL github="https://github.com/mlcommons/GaNDLF" LABEL docs="https://mlcommons.github.io/GaNDLF/" LABEL version=1.0 @@ -10,8 +10,8 @@ RUN apt-get update && apt-get install -y software-properties-common RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1 RUN python3.9 -m pip install --upgrade pip==24.0 -RUN python3.9 -m pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/rocm5.6 -RUN python3.9 -m pip install --upgrade pip==24.0 && python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker +RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/rocm5.7 +RUN python3.9 -m pip install --upgrade pip && python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker RUN apt-get update && apt-get install -y libgl1 # Do some dependency installation separately here to make layer caching more efficient @@ -23,11 +23,11 @@ COPY . /GaNDLF WORKDIR /GaNDLF RUN python3.9 -m pip install -e . -# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf_run -# If a user calls "docker run gandlf:[tag] gandlf_anonymize", it will resolve to running "python gandlf_anonymize" instead. +# Entrypoint forces all commands given via "docker run" to go through python, CMD forces the default entrypoint script argument to be gandlf run +# If a user calls "docker run gandlf:[tag] anonymize", it will resolve to running "gandlf anonymize" instead. # CMD is inherently overridden by args to "docker run", entrypoint is constant. -ENTRYPOINT python3.9 -CMD gandlf_run +ENTRYPOINT gandlf +CMD run # The below force the container commands to run as a nonroot user with UID > 10000. diff --git a/GANDLF/cli/deploy.py b/GANDLF/cli/deploy.py index fd96bd197..8beca9069 100644 --- a/GANDLF/cli/deploy.py +++ b/GANDLF/cli/deploy.py @@ -149,13 +149,7 @@ def deploy_docker_mlcube( if os.path.isfile(os.path.join(gandlf_root, item)) and item.startswith("Dockerfile-") ] - entrypoints = [ - item - for item in os.listdir(gandlf_root) - if os.path.isfile(os.path.join(gandlf_root, item)) - and item.startswith("gandlf_") - ] - for file in setup_files + dockerfiles + entrypoints: + for file in setup_files + dockerfiles: shutil.copy( os.path.join(gandlf_root, file), os.path.join(gandlf_root, "GANDLF", file), diff --git a/GANDLF/cli/generate_metrics.py b/GANDLF/cli/generate_metrics.py index f685bf1fc..d484b63a0 100644 --- a/GANDLF/cli/generate_metrics.py +++ b/GANDLF/cli/generate_metrics.py @@ -1,5 +1,5 @@ import sys -import yaml +import json from typing import Optional from pprint import pprint import pandas as pd @@ -31,8 +31,41 @@ ) +def __update_header_location_case_insensitive( + input_df: pd.DataFrame, expected_column_name: str, required: bool = True +) -> pd.DataFrame: + """ + This function checks for a column in the dataframe in a case-insensitive manner and renames it. + + Args: + input_df (pd.DataFrame): The input dataframe. + expected_column_name (str): The expected column name. + required (bool, optional): Whether the column is required. Defaults to True. + + Returns: + pd.DataFrame: The updated dataframe. + """ + actual_column_name = None + for col in input_df.columns: + if col.lower() == expected_column_name.lower(): + actual_column_name = col + break + + if required: + assert ( + actual_column_name is not None + ), f"Column {expected_column_name} not found in the dataframe" + + return input_df.rename(columns={actual_column_name: expected_column_name}) + else: + return input_df + + def generate_metrics_dict( - input_csv: str, config: str, outputfile: Optional[str] = None + input_csv: str, + config: str, + outputfile: Optional[str] = None, + missing_prediction: int = -1, ) -> dict: """ This function generates metrics from the input csv and the config. @@ -41,27 +74,83 @@ def generate_metrics_dict( input_csv (str): The input CSV. config (str): The input yaml config. outputfile (str, optional): The output file to save the metrics. Defaults to None. + missing_prediction (int, optional): The value to use for missing predictions as penalty. Default is -1. Returns: dict: The metrics dictionary. """ - input_df = pd.read_csv(input_csv) + # the case where the input is a comma-separated 2 files with targets and predictions + if "," in input_csv: + target_csv, prediction_csv = input_csv.split(",") + target_df = pd.read_csv(target_csv) + prediction_df = pd.read_csv(prediction_csv) + ## start sanity checks + # if missing predictions are not to be penalized, check if the number of rows in the target and prediction files are the same + if missing_prediction == -1: + assert ( + target_df.shape[0] == prediction_df.shape[0] + ), "The number of rows in the target and prediction files should be the same" + + # check if the number of columns in the target and prediction files are the same + assert ( + target_df.shape[1] == prediction_df.shape[1] + ), "The number of columns in the target and prediction files should be the same" + assert ( + target_df.shape[1] == 2 + ), "The target and prediction files should have *exactly* 2 columns" + + # find the correct header for the subjectID column + target_df = __update_header_location_case_insensitive(target_df, "SubjectID") + prediction_df = __update_header_location_case_insensitive( + prediction_df, "SubjectID" + ) + # check if prediction_df has extra subjectIDs + assert ( + prediction_df["SubjectID"].isin(target_df["SubjectID"]).all() + ), "The `SubjectID` column in the prediction file should be a subset of the `SubjectID` column in the target file" + + # individual checks for target and prediction dataframes + for df in [target_df, prediction_df]: + # check if the "subjectID" column has duplicates + assert ( + df["SubjectID"].duplicated().sum() == 0 + ), "The `SubjectID` column should not have duplicates" + + # check if SubjectID is the first column + assert ( + df.columns[0] == "SubjectID" + ), "The `SubjectID` column should be the first column in the target and prediction files" + + # change the column name after subjectID to target and prediction + target_df = target_df.rename(columns={target_df.columns[1]: "Target"}) + prediction_df = prediction_df.rename( + columns={prediction_df.columns[1]: "Prediction"} + ) + + # combine the two dataframes + input_df = target_df.merge(prediction_df, how="left", on="SubjectID").fillna( + missing_prediction + ) - # check required headers in a case insensitive manner - headers = {} - required_columns = ["subjectid", "prediction", "target"] - for col, _ in input_df.items(): - col_lower = col.lower() + else: + # the case where the input is a single file with targets and predictions + input_df = pd.read_csv(input_csv) + + # check required headers in a case insensitive manner and rename them + required_columns = ["SubjectID", "Prediction", "Target"] for column_to_check in required_columns: - if column_to_check == col_lower: - headers[column_to_check] = col - if col_lower == "mask": - headers["mask"] = col - for column in required_columns: - assert column in headers, f"The input csv should have a column named {column}" + input_df = __update_header_location_case_insensitive( + input_df, column_to_check + ) + + # check if the "subjectID" column has duplicates + assert ( + input_df["SubjectID"].duplicated().sum() == 0 + ), "The `SubjectID` column should not have duplicates" overall_stats_dict = {} parameters = ConfigManager(config) + # ensure that the problem_type is set problem_type = parameters.get("problem_type", None) problem_type = ( find_problem_type_from_parameters(parameters) @@ -70,12 +159,14 @@ def generate_metrics_dict( ) parameters["problem_type"] = problem_type - if problem_type == "regression" or problem_type == "classification": - parameters["model"]["num_classes"] = len(parameters["model"]["class_list"]) - predictions_tensor = torch.from_numpy( - input_df[headers["prediction"]].to_numpy().ravel() + if problem_type == "classification": + parameters["model"]["num_classes"] = parameters["model"].get( + "num_classes", len(parameters["model"]["class_list"]) ) - labels_tensor = torch.from_numpy(input_df[headers["target"]].to_numpy().ravel()) + + if problem_type == "regression" or problem_type == "classification": + predictions_tensor = torch.from_numpy(input_df["Prediction"].to_numpy().ravel()) + labels_tensor = torch.from_numpy(input_df["Target"].to_numpy().ravel()) overall_stats_dict = overall_stats( predictions_tensor, labels_tensor, parameters ) @@ -84,10 +175,10 @@ def generate_metrics_dict( # read images and then calculate metrics class_list = parameters["model"]["class_list"] for _, row in tqdm(input_df.iterrows(), total=input_df.shape[0]): - current_subject_id = row[headers["subjectid"]] + current_subject_id = row["SubjectID"] overall_stats_dict[current_subject_id] = {} - label_image = torchio.LabelMap(row[headers["target"]]) - pred_image = torchio.LabelMap(row[headers["prediction"]]) + label_image = torchio.LabelMap(row["Target"]) + pred_image = torchio.LabelMap(row["Prediction"]) label_tensor = label_image.data pred_tensor = pred_image.data spacing = label_image.spacing @@ -225,20 +316,17 @@ def __percentile_clip( ) # normalizes values to [0;1] return output_tensor + input_df = __update_header_location_case_insensitive(input_df, "Mask", False) for _, row in tqdm(input_df.iterrows(), total=input_df.shape[0]): - current_subject_id = row[headers["subjectid"]] + current_subject_id = row["SubjectID"] overall_stats_dict[current_subject_id] = {} - target_image = __fix_2d_tensor( - torchio.ScalarImage(row[headers["target"]]).data - ) - pred_image = __fix_2d_tensor( - torchio.ScalarImage(row[headers["prediction"]]).data - ) - # if "mask" is not in the row, we assume that the whole image is the mask + target_image = __fix_2d_tensor(torchio.ScalarImage(row["Target"]).data) + pred_image = __fix_2d_tensor(torchio.ScalarImage(row["Prediction"]).data) + # if "Mask" is not in the row, we assume that the whole image is the mask # always cast to byte tensor mask = ( - __fix_2d_tensor(torchio.LabelMap(row[headers["mask"]]).data) - if "mask" in row + __fix_2d_tensor(torchio.LabelMap(row["Mask"]).data) + if "Mask" in row else torch.from_numpy( np.ones(target_image.numpy().shape, dtype=np.uint8) ) @@ -339,5 +427,8 @@ def __percentile_clip( pprint(overall_stats_dict) if outputfile is not None: - with open(outputfile, "w") as outfile: - yaml.dump(overall_stats_dict, outfile) + ## todo: needs debugging since this writes the file handler in some cases, so replaced with json + # with open(outputfile, "w") as outfile: + # yaml.dump(overall_stats_dict, outfile) + with open(outputfile, "w") as file: + file.write(json.dumps(overall_stats_dict)) diff --git a/GANDLF/cli/post_training_model_optimization.py b/GANDLF/cli/post_training_model_optimization.py index 0ca261465..6dc2a4310 100644 --- a/GANDLF/cli/post_training_model_optimization.py +++ b/GANDLF/cli/post_training_model_optimization.py @@ -1,16 +1,21 @@ import os +from pathlib import Path +from typing import Optional from GANDLF.compute import create_pytorch_objects from GANDLF.config_manager import ConfigManager from GANDLF.utils import version_check, load_model, optimize_and_save_model -def post_training_model_optimization(model_path: str, config_path: str) -> bool: +def post_training_model_optimization( + model_path: str, config_path: Optional[str] = None, output_dir: Optional[str] = None +) -> bool: """ CLI function to optimize a model for deployment. Args: model_path (str): Path to the model file. - config_path (str): Path to the config file. + config_path (str, optional): Path to the configuration file. + output_dir (str, optional): Output directory to save the optimized model. Returns: bool: True if successful, False otherwise. @@ -26,6 +31,12 @@ def post_training_model_optimization(model_path: str, config_path: str) -> bool: else parameters ) + output_dir = os.path.dirname(model_path) if output_dir is None else output_dir + Path(output_dir).mkdir(parents=True, exist_ok=True) + optimized_model_path = os.path.join( + output_dir, os.path.basename(model_path).replace("pth.tar", "onnx") + ) + # Create PyTorch objects and set onnx_export to True for optimization model, _, _, _, _, parameters = create_pytorch_objects(parameters, device="cpu") parameters["model"]["onnx_export"] = True @@ -35,10 +46,9 @@ def post_training_model_optimization(model_path: str, config_path: str) -> bool: model.load_state_dict(main_dict["model_state_dict"]) # Optimize the model and save it to an ONNX file - optimize_and_save_model(model, parameters, model_path, onnx_export=True) + optimize_and_save_model(model, parameters, optimized_model_path, onnx_export=True) # Check if the optimized model file exists - optimized_model_path = model_path.replace("pth.tar", "onnx") if not os.path.exists(optimized_model_path): print("Error while optimizing the model.") return False diff --git a/GANDLF/compute/loss_and_metric.py b/GANDLF/compute/loss_and_metric.py index 36f78560e..23b7010ce 100644 --- a/GANDLF/compute/loss_and_metric.py +++ b/GANDLF/compute/loss_and_metric.py @@ -67,13 +67,10 @@ def get_loss_and_metrics( loss_function = global_losses_dict[list(params["loss_function"].keys())[0]] else: loss_str_lower = params["loss_function"].lower() - if loss_str_lower in global_losses_dict: - loss_function = global_losses_dict[loss_str_lower] - else: - sys.exit( - "WARNING: Could not find the requested loss function '" - + params["loss_function"] - ) + assert ( + loss_str_lower in global_losses_dict + ), f"Could not find the requested loss function '{params['loss_function']}'" + loss_function = global_losses_dict[loss_str_lower] loss = 0 # specialized loss function for sdnet diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index ea3a6e408..99497fbb1 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -1,3 +1,5 @@ +# import logging +import traceback from typing import Optional, Union import sys, yaml, ast import numpy as np @@ -478,26 +480,23 @@ def _parseConfig( # iterate through all keys for key in params["data_preprocessing"]: # iterate through all keys - # for threshold or clip, ensure min and max are defined - if not thresholdOrClip: - if key in thresholdOrClipDict: - thresholdOrClip = True # we only allow one of threshold or clip to occur and not both - # initialize if nothing is present - if not (isinstance(params["data_preprocessing"][key], dict)): - params["data_preprocessing"][key] = {} - - # if one of the required parameters is not present, initialize with lowest/highest possible values - # this ensures the absence of a field doesn't affect processing - if not "min" in params["data_preprocessing"][key]: - params["data_preprocessing"][key][ - "min" - ] = sys.float_info.min - if not "max" in params["data_preprocessing"][key]: - params["data_preprocessing"][key][ - "max" - ] = sys.float_info.max - elif key in thresholdOrClipDict: - sys.exit("Use only 'threshold' or 'clip', not both") + if key in thresholdOrClipDict: + # we only allow one of threshold or clip to occur and not both + assert not ( + thresholdOrClip + ), "Use only `threshold` or `clip`, not both" + thresholdOrClip = True + # initialize if nothing is present + if not (isinstance(params["data_preprocessing"][key], dict)): + params["data_preprocessing"][key] = {} + + # if one of the required parameters is not present, initialize with lowest/highest possible values + # this ensures the absence of a field doesn't affect processing + # for threshold or clip, ensure min and max are defined + if not "min" in params["data_preprocessing"][key]: + params["data_preprocessing"][key]["min"] = sys.float_info.min + if not "max" in params["data_preprocessing"][key]: + params["data_preprocessing"][key]["max"] = sys.float_info.max if key == "histogram_matching": if params["data_preprocessing"][key] is not False: @@ -618,11 +617,15 @@ def _parseConfig( params["model"]["class_list"] = temp_classList.split(",") else: try: - params["model"]["class_list"] = ast.literal_eval( - params["model"]["class_list"] - ) - except AssertionError: - raise AssertionError("Could not evaluate the 'class_list' in 'model'") + params["model"]["class_list"] = eval(params["model"]["class_list"]) + except Exception as e: + ## todo: ensure logging captures assertion errors + assert ( + False + ), f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" + # logging.error( + # f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" + # ) assert ( "nested_training" in params @@ -738,4 +741,14 @@ def ConfigManager( Returns: dict: The parameter dictionary. """ - return _parseConfig(config_file_path, version_check_flag) + try: + return _parseConfig(config_file_path, version_check_flag) + except Exception as e: + ## todo: ensure logging captures assertion errors + assert ( + False + ), f"Config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" + # logging.error( + # f"gandlf config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" + # ) + # raise diff --git a/GANDLF/data/ImagesFromDataFrame.py b/GANDLF/data/ImagesFromDataFrame.py index 39bc9f8cb..672e64c1d 100644 --- a/GANDLF/data/ImagesFromDataFrame.py +++ b/GANDLF/data/ImagesFromDataFrame.py @@ -17,7 +17,7 @@ get_correct_padding_size, ) from .preprocessing import get_transforms_for_preprocessing -from .augmentation import global_augs_dict +from .augmentation import get_augmentation_transforms global_sampler_dict = { "uniform": torchio.data.UniformSampler, @@ -171,7 +171,7 @@ def _save_resized_images( # if predictionHeaders: # # get the mask # if (subject_dict['label'] is None) and (class_list is not None): - # sys.exit('The \'class_list\' parameter has been defined but a label file is not present for patient: ', patient) + # logging.error('The \'class_list\' parameter has been defined but a label file is not present for patient: ', patient) if labelHeader is not None: if not os.path.isfile(str(dataframe[labelHeader][patient])): @@ -259,15 +259,16 @@ def _save_resized_images( ), f"The following subjects could not be loaded, please recheck or remove and retry: {subjects_with_error}" transformations_list = [] - - # augmentations are applied to the training set only if train and not (augmentations is None): - for aug in augmentations: - aug_lower = aug.lower() - if aug_lower in global_augs_dict: - transformations_list.append( - global_augs_dict[aug_lower](augmentations[aug]) - ) + transformations_list.extend(get_augmentation_transforms(augmentations)) + # augmentations are applied to the training set only + # if train and not (augmentations is None): + # for aug in augmentations: + # aug_lower = aug.lower() + # if aug_lower in global_augs_dict: + # transformations_list.append( + # global_augs_dict[aug_lower](augmentations[aug]) + # ) transform = get_transforms_for_preprocessing( parameters, transformations_list, train, apply_zero_crop diff --git a/GANDLF/data/augmentation/__init__.py b/GANDLF/data/augmentation/__init__.py index 22b7f1949..b3720a108 100644 --- a/GANDLF/data/augmentation/__init__.py +++ b/GANDLF/data/augmentation/__init__.py @@ -1,3 +1,7 @@ +from warnings import warn +from typing import List, Union, Dict, Callable + + from .wrap_torchio import ( mri_artifact, motion, @@ -35,3 +39,41 @@ "colorjitter": colorjitter_transform, "hed_transform": hed_transform, } + + +def get_augmentation_transforms( + augmentation_params_dict: Union[Dict[str, object], List[str]] +) -> List[Callable]: + """ + This function gets the augmentation transformations from the parameters. + + Args: + augmentation_params_dict (dict): The dictionary containing the parameters for the augmentation. + + Returns: + List[Callable]: The list of augmentation to be applied. + """ + current_augmentations = [] + + # Check if user specified some augmentations without extra params + if isinstance(augmentation_params_dict, list): + for n, augmentation_type in enumerate(augmentation_params_dict): + if isinstance(augmentation_type, dict): + continue + else: + augmentation_params_dict[n] = {augmentation_type: {}} + + for augmentation_type, augmentation_params in augmentation_params_dict.items(): + augmentation_type_lower = augmentation_type.lower() + + if augmentation_type_lower in global_augs_dict: + current_augmentations.append( + global_augs_dict[augmentation_type_lower](augmentation_params) + ) + else: + warn( + f"Augmentation {augmentation_type} not found in the global augmentation dictionary.", + UserWarning, + ) + + return current_augmentations diff --git a/GANDLF/data/preprocessing/__init__.py b/GANDLF/data/preprocessing/__init__.py index cfb0907d1..9944d98b1 100644 --- a/GANDLF/data/preprocessing/__init__.py +++ b/GANDLF/data/preprocessing/__init__.py @@ -65,11 +65,7 @@ def centercrop_transform(patch_size): def rescale_transform(parameters=None): if parameters is None: parameters = {} - # get defaults from torchio - rescaler = RescaleIntensity() - rescaler.out_min_max = parameters.get("out_min_max", rescaler.out_min_max) - rescaler.percentiles = parameters.get("percentiles", rescaler.percentiles) - rescaler.in_min_max = parameters.get("in_min_max", None) + rescaler = RescaleIntensity(**parameters) return rescaler diff --git a/GANDLF/entrypoints/__init__.py b/GANDLF/entrypoints/__init__.py new file mode 100644 index 000000000..f63710217 --- /dev/null +++ b/GANDLF/entrypoints/__init__.py @@ -0,0 +1,10 @@ +from GANDLF.cli import copyrightMessage + + +def append_copyright_to_help(command_func): + command_func.__doc__ = ( + copyrightMessage + if command_func.__doc__ is None + else (command_func.__doc__ + "\n\n" + copyrightMessage) + ) + return command_func diff --git a/GANDLF/entrypoints/anonymizer.py b/GANDLF/entrypoints/anonymizer.py new file mode 100644 index 000000000..32a805846 --- /dev/null +++ b/GANDLF/entrypoints/anonymizer.py @@ -0,0 +1,133 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- +import logging +import os +import argparse +import yaml +from typing import Optional +import click +from deprecated import deprecated + +from GANDLF.anonymize import run_anonymizer +from GANDLF.cli import copyrightMessage +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logging import logger_setup + + +def _anonymize_images( + input_dir: str, output_file: str, config_path: Optional[str], modality: str +): + input_dir = os.path.normpath(input_dir) + output_file = os.path.normpath(output_file) + # TODO: raise an error if config pass provided but not exist (user made a typo?) + config = None + if config_path and os.path.isfile(config_path): + config = yaml.safe_load(open(config_path, "r")) + + logging.debug(f"{input_dir=}") + logging.debug(f"{output_file=}") + logging.debug(f"{config=}") + logging.debug(f"{modality=}") + run_anonymizer(input_dir, output_file, config, modality) + + logging.info("Finished successfully.") + + +# new way of defining params via click +@click.command() +@click.option( + "--input-dir", + "-i", + required=True, + type=click.Path(exists=True), + help="Input directory or file which contains images to be anonymized.", +) +@click.option( + "--config", + "-c", + help="Config (in YAML) for running anonymization, optionally, specify modality using '-m' for defaults.", + type=click.Path(exists=True, file_okay=True, dir_okay=False), +) +@click.option( + "--modality", + "-m", + default="rad", + type=click.Choice(["rad", "histo"]), + help="The modality type, can be 'rad' or 'histo'.", +) +@click.option( + "--output-file", + "-o", + required=True, + type=click.Path(), + help="Output directory or file which will contain the image(s) after anonymization.", +) +@append_copyright_to_help +def new_way(input_dir, config, modality, output_file): + """Anonymize images/scans in the data directory.""" + _anonymize_images(input_dir, output_file, config, modality) + + +# old-fashioned way of running gandlf via `gandlf_anonymizer`. +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf anonymizer` cli command " + + "instead of `gandlf_anonymizer`. Note that in new CLI tool some params were renamed:\n" + + " --inputDir to --input-dir\n" + + " --outputFile to --output-file\n" + + "`gandlf_anonymizer` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_Anonymize", + formatter_class=argparse.RawTextHelpFormatter, + description="Anonymize images/scans in the data directory.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-i", + "--inputDir", + metavar="", + type=str, + help="Input directory or file which contains images to be anonymized.", + ) + parser.add_argument( + "-c", + "--config", + metavar="", + default="", + type=str, + help="config (in YAML) for running anonymization, optionally, specify modality using '-m' for defaults.", + ) + parser.add_argument( + "-m", + "--modality", + metavar="", + default="rad", + type=str, + help="The modality type, can be 'rad' or 'histo'.", + ) + parser.add_argument( + "-o", + "--outputFile", + metavar="", + type=str, + help="Output directory or file which will contain the image(s) after anonymization.", + ) + args = parser.parse_args() + + # check for required parameters - this is needed here to keep the cli clean + for param_name in ["inputDir", "outputFile"]: + param_none_check = getattr(args, param_name) + assert param_none_check is not None, f"Missing required parameter: {param_name}" + + inputDir = args.inputDir + outputFile = args.outputFile + config = args.config or None + + _anonymize_images(inputDir, outputFile, config, args.modality) + + +# main function +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/cli_tool.py b/GANDLF/entrypoints/cli_tool.py new file mode 100644 index 000000000..1f20ff4d6 --- /dev/null +++ b/GANDLF/entrypoints/cli_tool.py @@ -0,0 +1,24 @@ +import click +from .subcommands import cli_subcommands +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup +from GANDLF import version + + +@click.group() +@click.version_option(version, "--version", "-v", message="GANDLF Version: %(version)s") +@click.pass_context # Pass the context to subcommands +@append_copyright_to_help +def gandlf(ctx): + """GANDLF command-line tool.""" + ctx.ensure_object(dict) + logger_setup() + + +# registers subcommands: `gandlf anonymizer`, `gandlf run`, etc. +for command_name, command in cli_subcommands.items(): + gandlf.add_command(command, command_name) + +if __name__ == "__main__": + # pylint: disable=E1120 + gandlf() diff --git a/gandlf_collectStats b/GANDLF/entrypoints/collect_stats.py similarity index 66% rename from gandlf_collectStats rename to GANDLF/entrypoints/collect_stats.py index 1a79fa127..3869dce6f 100644 --- a/gandlf_collectStats +++ b/GANDLF/entrypoints/collect_stats.py @@ -1,14 +1,20 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - +import logging import os import argparse +from typing import Optional + +import click import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from pathlib import Path +from deprecated import deprecated from GANDLF.cli import copyrightMessage +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup def plot_all(df_training, df_validation, df_testing, output_plot_dir): @@ -129,7 +135,77 @@ def plot_all(df_training, df_validation, df_testing, output_plot_dir): return df_training, df_validation, df_testing -if __name__ == "__main__": +def _read_data_and_plot( + training_logs_path: str, + validation_logs_path: str, + testing_logs_path: Optional[str], + output_plot_path: str, + output_file: str, +): + # moved out from _collect_stats for easier testing + # Read all the files + df_training = pd.read_csv(training_logs_path) + df_validation = pd.read_csv(validation_logs_path) + df_testing = pd.read_csv(testing_logs_path) if testing_logs_path else None + + # Check for metrics in columns and do tight plots + plot_all(df_training, df_validation, df_testing, output_plot_path) + + df_training["split"] = "train" + df_testing["split"] = "test" + df_validation["split"] = "validation" + pd.concat((df_training, df_testing, df_validation)).to_csv(output_file) + + +def _collect_stats(model_dir: str, output_dir: str): + input_dir = os.path.normpath(model_dir) + output_dir = os.path.normpath(output_dir) + Path(output_dir).mkdir(parents=True, exist_ok=True) + output_file = os.path.join(output_dir, "data.csv") # data file name + output_plot = os.path.join(output_dir, "plot.png") # plot file + + training_logs = os.path.join(input_dir, "logs_training.csv") + validation_logs = os.path.join(input_dir, "logs_validation.csv") + testing_logs = os.path.join(input_dir, "logs_testing.csv") + if not os.path.isfile(testing_logs): + logging.info(f"testing logs file was not found: {testing_logs}") + testing_logs = None + + _read_data_and_plot( + training_logs, validation_logs, testing_logs, output_plot, output_file + ) + + +@click.command() +@click.option( + "--model-dir", + "-m", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + required=True, + help="Input directory which contains testing and validation models log files", +) +@click.option( + "--output-dir", + "-o", + type=click.Path(file_okay=False, dir_okay=True), + required=True, + help="Output directory to save stats and plot", +) +@append_copyright_to_help +def new_way(model_dir: str, output_dir: str): + """Collect statistics from different testing/validation combinations from output directory.""" + _collect_stats(model_dir=model_dir, output_dir=output_dir) + + +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf collect-stats` cli command " + + "instead of `gandlf_collectStats`. Note that in new CLI tool params were renamed to snake-case:\n" + + " --modeldir to --model-dir\n" + + " --outputdir to --output-dir\n" + + "`gandlf_collectStats` script would be deprecated soon." +) +def old_way(): + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_CollectStats", formatter_class=argparse.RawTextHelpFormatter, @@ -152,21 +228,8 @@ def plot_all(df_training, df_validation, df_testing, output_plot_dir): ) args = parser.parse_args() + _collect_stats(args.modeldir, args.outputdir) - inputDir = os.path.normpath(args.modeldir) - outputDir = os.path.normpath(args.outputdir) - Path(outputDir).mkdir(parents=True, exist_ok=True) - outputFile = os.path.join(outputDir, "data.csv") # data file name - outputPlot = os.path.join(outputDir, "plot.png") # plot file - trainingLogs = os.path.join(inputDir, "logs_training.csv") - validationLogs = os.path.join(inputDir, "logs_validation.csv") - testingLogs = os.path.join(inputDir, "logs_testing.csv") - - # Read all the files - df_training = pd.read_csv(trainingLogs) - df_validation = pd.read_csv(validationLogs) - df_testing = pd.read_csv(testingLogs) if os.path.isfile(testingLogs) else None - - # Check for metrics in columns and do tight plots - plot_all(df_training, df_validation, df_testing, outputPlot) +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/config_generator.py b/GANDLF/entrypoints/config_generator.py new file mode 100644 index 000000000..861d80077 --- /dev/null +++ b/GANDLF/entrypoints/config_generator.py @@ -0,0 +1,85 @@ +import argparse +from deprecated import deprecated +import click + +from GANDLF.cli import config_generator, copyrightMessage +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _generate_config(config: str, strategy: str, output: str): + config_generator(config, strategy, output) + print("Finished.") + + +@click.command() +@click.option( + "--config", + "-c", + help="Path to base config.", + required=True, + type=click.Path(exists=True, file_okay=True, dir_okay=False), +) +@click.option( + "--strategy", + "-s", + type=click.Path(exists=True, file_okay=True, dir_okay=False), + required=True, + help="Config creation strategy in a yaml format.", +) +@click.option( + "--output", + "-o", + required=True, + type=click.Path(file_okay=False, dir_okay=True), + help="Path to output directory.", +) +@append_copyright_to_help +def new_way(config, strategy, output): + """Generate multiple GaNDLF configurations based on a single baseline GaNDLF for experimentation.""" + _generate_config(config, strategy, output) + + +# old-fashioned way of running gandlf via `gandlf_configGenerator`. +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf config-generator` cli command " + + "instead of `gandlf_configGenerator`.\n" + + "`gandlf_configGenerator` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_ConfigGenerator", + formatter_class=argparse.RawTextHelpFormatter, + description="Generate multiple GaNDLF configurations based on a single baseline GaNDLF for experimentation.\n\n" + + copyrightMessage, + ) + + parser.add_argument( + "-c", + "--config", + metavar="", + type=str, + help="Path to base config.", + required=True, + ) + parser.add_argument( + "-s", + "--strategy", + metavar="", + type=str, + help="Config creation strategy in a yaml format.", + required=True, + ) + parser.add_argument( + "-o", + "--output", + metavar="", + type=str, + help="Path to output directory.", + required=True, + ) + + args = parser.parse_args() + + _generate_config(args.config, args.strategy, args.output) diff --git a/GANDLF/entrypoints/construct_csv.py b/GANDLF/entrypoints/construct_csv.py new file mode 100644 index 000000000..b632d28fb --- /dev/null +++ b/GANDLF/entrypoints/construct_csv.py @@ -0,0 +1,180 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- +import logging +import os +import argparse +import ast +from typing import Optional +import yaml +import click +from deprecated import deprecated + +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import writeTrainingCSV + +from GANDLF.cli import copyrightMessage +from GANDLF.utils import logger_setup + + +def _construct_csv( + input_dir: str, + channels_id: str, + label_id: Optional[str], + output_file: str, + relativize_paths_to_output: bool, +): + input_dir = os.path.normpath(input_dir) + output_file = os.path.normpath(output_file) + + # Do some special handling for if users pass a yml file for channel/label IDs + # This is used for MLCube functionality because MLCube does not support plain string inputs. + if channels_id.endswith(".yml") or channels_id.endswith(".yaml"): + if os.path.isfile(channels_id): + with open(channels_id, "r") as f: + content = yaml.safe_load(f) + channels_id = content["channels"] + if isinstance(channels_id, list): + channels_id = ",".join(channels_id) + + # TODO: raise a warning if label param is both passed as arg and defined in file + if "label" in content: + label_id = content["label"] + if isinstance(label_id, list): # TODO: it can be really a list? + label_id = ",".join(label_id) + + logging.debug(f"{input_dir=}") + logging.debug(f"{channels_id=}") + logging.debug(f"{label_id=}") + logging.debug(f"{output_file=}") + logging.debug(f"{relativize_paths_to_output=}") + + writeTrainingCSV( + input_dir, channels_id, label_id, output_file, relativize_paths_to_output + ) + + +@click.command() +@click.option( + "--input-dir", + "-i", + required=True, + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Input data directory which contains images in specified format", +) +@click.option( + "--channels-id", + "-c", + required=True, + help="Channels/modalities identifier string to check for in all files in 'input_dir'; for example: " + "--channels-id _t1.nii.gz,_t2.nii.gz. May be a YAML file with `channels` list of suffixes", + type=str, +) +@click.option( + "--label-id", + "-l", + type=str, + help="Label/mask identifier string to check for in all files in 'input_dir'; for example: " + "--label-id _seg.nii.gz. Param value is ignored in `label` is defined in channels YAML file", +) +@click.option( + "--output-file", + "-o", + required=True, + type=click.Path(file_okay=True, dir_okay=False), + help="Output CSV file", +) +@click.option( + "--relativize-paths", + "-r", + is_flag=True, + help="If True, paths in the output data CSV will always be relative to the location" + " of the output data CSV itself.", +) +@append_copyright_to_help +def new_way( + input_dir: str, + channels_id: str, + label_id: Optional[str], + output_file: str, + relativize_paths: bool, +): + """Generate training/inference CSV from data directory.""" + _construct_csv( + input_dir=input_dir, + channels_id=channels_id, + label_id=label_id, + output_file=output_file, + relativize_paths_to_output=relativize_paths, + ) + + +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf construct-csv` cli command " + + "instead of `gandlf_constructCSV`. Note that in new CLI tool some params were renamed:\n" + + " --inputDir to --input-dir\n" + + " --channelsID to --channels-id\n" + + " --labelID to --label-id\n" + + " --outputFile to --output-file\n" + + " --relativizePaths to --relativize-paths and converted to flag, i.e. no value required\n" + + "`gandlf_constructCSV` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_ConstructCSV", + formatter_class=argparse.RawTextHelpFormatter, + description="Generate training/inference CSV from data directory.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-i", + "--inputDir", + metavar="", + type=str, + help="Input data directory which contains images in specified format", + ) + parser.add_argument( + "-c", + "--channelsID", + metavar="", + type=str, + help="Channels/modalities identifier string to check for in all files in 'input_dir'; for example: --channelsID _t1.nii.gz,_t2.nii.gz", + ) + parser.add_argument( + "-l", + "--labelID", + default=None, + type=str, + help="Label/mask identifier string to check for in all files in 'input_dir'; for example: --labelID _seg.nii.gz", + ) + parser.add_argument( + "-o", "--outputFile", metavar="", type=str, help="Output CSV file" + ) + parser.add_argument( + "-r", + "--relativizePaths", + metavar="", + type=ast.literal_eval, + default=False, + help="If True, paths in the output data CSV will always be relative to the location of the output data CSV itself.", + ) + + args = parser.parse_args() + + # check for required parameters - this is needed here to keep the cli clean + for param_name in ["inputDir", "channelsID", "outputFile"]: + param_none_check = getattr(args, param_name) + assert param_none_check is not None, f"Missing required parameter: {param_name}" + + _construct_csv( + input_dir=args.inputDir, + channels_id=args.channelsID, + label_id=args.labelID, + output_file=args.outputFile, + relativize_paths_to_output=args.relativizePaths, + ) + + +# main function +if __name__ == "__main__": + old_way() diff --git a/gandlf_debugInfo b/GANDLF/entrypoints/debug_info.py similarity index 53% rename from gandlf_debugInfo rename to GANDLF/entrypoints/debug_info.py index e3de0bdf1..a179513a1 100644 --- a/gandlf_debugInfo +++ b/GANDLF/entrypoints/debug_info.py @@ -1,12 +1,16 @@ #!usr/bin/env python # -*- coding: utf-8 -*- import platform +from deprecated import deprecated +import click from GANDLF import __version__ +from GANDLF.entrypoints import append_copyright_to_help from GANDLF.utils import get_git_hash +from GANDLF.utils import logger_setup -if __name__ == "__main__": +def _debug_info(): print(f"GANDLF version: {__version__}") print(f"Git hash: {get_git_hash()}") print(f"Platform: {platform.platform()}") @@ -18,3 +22,25 @@ print(f" Implementation: {platform.python_implementation()}") print(f" Compiler: {platform.python_compiler()}") print(f" Build: {(' ').join(list(platform.python_build()))}") + + +@click.command() +@append_copyright_to_help +def new_way(): + """Displays detailed info about system environment: library versions, settings, etc.""" + _debug_info() + + +# main function +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf debug-info` cli command " + + "instead of `gandlf_debugInfo`.\n" + + "`gandlf_debugInfo` script would be deprecated soon." +) +def old_way(): + _debug_info() + logger_setup() + + +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/deploy.py b/GANDLF/entrypoints/deploy.py new file mode 100644 index 000000000..f24a25cf1 --- /dev/null +++ b/GANDLF/entrypoints/deploy.py @@ -0,0 +1,246 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import ast +import os +from typing import Optional + +import click +from deprecated import deprecated +from GANDLF.cli import ( + deploy_targets, + mlcube_types, + run_deployment, + recover_config, + copyrightMessage, +) +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _deploy( + model: Optional[str], + config: Optional[str], + target: str, + mlcube_type: str, + mlcube_root: str, + output_dir: str, + requires_gpu: bool, + entrypoint: Optional[str], +): + os.makedirs(output_dir, exist_ok=True) + + default_config = os.path.join(output_dir, "original_config.yml") + if not config and mlcube_type == "model": + result = recover_config(model, default_config) + assert ( + result + ), "Error: No config was specified but automatic config extraction failed." + config = default_config + + if not model and mlcube_type == "model": + raise AssertionError( + "Error: a path to a model directory should be provided when deploying a model" + ) + print(f"{mlcube_root=}") + print(f"{output_dir=}") + print(f"{target=}") + print(f"{mlcube_type=}") + print(f"{entrypoint=}") + print(f"{config=}") + print(f"{model=}") + print(f"{requires_gpu=}") + + result = run_deployment( + mlcubedir=mlcube_root, + outputdir=output_dir, + target=target, + mlcube_type=mlcube_type, + entrypoint_script=entrypoint, + configfile=config, + modeldir=model, + requires_gpu=requires_gpu, + ) + + assert result, "Deployment to the target platform failed." + + +@click.command() +@click.option( + "--model", + "-m", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to the model directory you wish to deploy. Required for model MLCubes, " + "ignored for metrics MLCubes.", +) +@click.option( + "--config", + "-c", + help="Optional path to an alternative config file to be embedded with the model. " + "If blank/default, we use the previous config from the model instead. " + "Only relevant for model MLCubes. Ignored for metrics MLCubes", + type=click.Path(exists=True, file_okay=True, dir_okay=False), +) +@click.option( + "--target", + "-t", + required=True, + type=click.Choice(deploy_targets), + help="The target platform.", +) +@click.option( + "--mlcube-type", + type=click.Choice(mlcube_types), + required=True, + help="The mlcube type.", +) +@click.option( + "--mlcube-root", + "-r", + required=True, + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Path to an alternative MLCUBE_ROOT directory to use as a template. The source " + "repository contains an example (https://github.com/mlcommons/GaNDLF/tree/master/mlcube).", +) +@click.option( + "--output-dir", + "-o", + required=True, + help="Output directory path. " + "For MLCube builds, generates an MLCube directory to be distributed with your MLCube.", + type=click.Path(file_okay=False, dir_okay=True), +) +@click.option( + "--requires-gpu/--no-gpu", + "-g", + is_flag=True, + default=True, + help="True if the model requires a GPU by default, False otherwise. " + "Only relevant for model MLCubes. Ignored for metrics MLCubes", +) +@click.option( + "--entrypoint", + "-e", + type=click.Path(exists=True, file_okay=True, dir_okay=False), + help="An optional custom python entrypoint script to use instead of the default specified in mlcube.yaml." + " (Only for inference and metrics)", +) +@append_copyright_to_help +def new_way( + model: Optional[str], + config: Optional[str], + target: str, + mlcube_type: str, + mlcube_root: str, + output_dir: str, + requires_gpu: bool, + entrypoint: Optional[str], +): + """Generate frozen/deployable versions of trained GaNDLF models.""" + _deploy( + model=model, + config=config, + target=target, + mlcube_type=mlcube_type, + mlcube_root=mlcube_root, + output_dir=output_dir, + requires_gpu=requires_gpu, + entrypoint=entrypoint, + ) + + +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf deploy` cli command " + + "instead of `gandlf_deploy`. Note that in new CLI tool some params were renamed or changed its behavior:\n" + + " --outputdir to --output-dr\n" + + " --requires-gpu/-g now works as flag: True by default or if flag is passed. To disable gpu, use `--no-gpu` option\n" + + "`gandlf_deploy` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_Deploy", + formatter_class=argparse.RawTextHelpFormatter, + description="Generate frozen/deployable versions of trained GaNDLF models.\n\n" + + copyrightMessage, + ) + + parser.add_argument( + "-m", + "--model", + metavar="", + type=str, + help="Path to the model directory you wish to deploy. Required for model MLCubes, ignored for metrics MLCubes.", + default=None, + ) + parser.add_argument( + "-c", + "--config", + metavar="", + type=str, + default=None, + help="Optional path to an alternative config file to be embedded with the model. If blank/default, we use the previous config from the model instead. Only relevant for model MLCubes. Ignored for metrics MLCubes", + ) + parser.add_argument( + "-t", + "--target", + metavar="", + type=str, + help="The target platform. Valid inputs are: " + + ", ".join(deploy_targets) + + " .", + required=True, + ) + parser.add_argument( + "--mlcube-type", + metavar="", + type=str, + help="The mlcube type. Valid inputs are: " + ", ".join(mlcube_types) + " .", + required=True, + ) + parser.add_argument( + "-r", + "--mlcube-root", + metavar="", + type=str, + required=True, + help="Path to an alternative MLCUBE_ROOT directory to use as a template. The source repository contains an example (https://github.com/mlcommons/GaNDLF/tree/master/mlcube).", + ) + parser.add_argument( + "-o", + "--outputdir", + metavar="", + type=str, + help="Output directory path. For MLCube builds, generates an MLCube directory to be distributed with your MLCube.", + required=True, + ) + parser.add_argument( + "-g", + "--requires-gpu", + metavar="", + type=ast.literal_eval, + help="True if the model requires a GPU by default, False otherwise. Only relevant for model MLCubes. Ignored for metrics MLCubes", + default=True, + ) + parser.add_argument( + "-e", + "--entrypoint", + metavar="", + type=str, + help="An optional custom python entrypoint script to use instead of the default specified in mlcube.yaml. (Only for inference and metrics)", + default=None, + ) + + args = parser.parse_args() + + _deploy( + model=args.model, + config=args.config, + target=args.target, + mlcube_type=args.mlcube_type, + mlcube_root=args.mlcube_root, + output_dir=args.outputdir, + requires_gpu=args.requires_gpu, + entrypoint=args.entrypoint, + ) diff --git a/GANDLF/entrypoints/generate_metrics.py b/GANDLF/entrypoints/generate_metrics.py new file mode 100644 index 000000000..5d589a9f0 --- /dev/null +++ b/GANDLF/entrypoints/generate_metrics.py @@ -0,0 +1,151 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import click +from deprecated import deprecated +from typing import Optional + +from GANDLF import version +from GANDLF.cli import copyrightMessage +from GANDLF.cli.generate_metrics import generate_metrics_dict +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _generate_metrics( + input_data: str, + config: str, + output_file: Optional[str], + missing_prediction: int = -1, +): + generate_metrics_dict(input_data, config, output_file, missing_prediction) + print("Finished.") + + +@click.command() +@click.option( + "--config", + "-c", + required=True, + help="The configuration file (contains all the information related to the training/inference session)", + type=click.Path(exists=True, file_okay=True, dir_okay=False), +) +@click.option( + "--input-data", + "-i", + required=True, + type=str, + help="The CSV file of input data that is used to generate the metrics; " + "should contain 3 columns: 'SubjectID,Target,Prediction'", +) +@click.option( + "--output-file", + "-o", + type=click.Path(file_okay=True, dir_okay=False), + help="Location to save the output dictionary. If not provided, will print to stdout.", +) +@click.option( + "--missing-prediction", + "-m", + required=False, + type=int, + default=-1, + help="The value to use for missing predictions as penalty; if `-1`, this does not get added. This is only used in the case where the targets and predictions are passed independently.", +) +@click.option("--raw-input", hidden=True) +@append_copyright_to_help +def new_way( + config: str, + input_data: str, + output_file: Optional[str], + missing_prediction: int, + raw_input: str, +): + """Metrics calculator.""" + _generate_metrics( + input_data=input_data, + config=config, + output_file=output_file, + missing_prediction=missing_prediction, + ) + + +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf generate-metrics` cli command " + + "instead of `gandlf_generateMetrics`. Note that in new CLI tool some params were renamed or " + "changed its behavior:\n" + + " --parameters_file to --config\n" + + " --inputdata/--data_path to --input-data\n" + + " --outputfile/--output_path to --output-file\n" + + " --version removed; use `gandlf --version` instead\n" + + "`gandlf_generateMetrics` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_Metrics", + formatter_class=argparse.RawTextHelpFormatter, + description="Metrics calculator.\n\n" + copyrightMessage, + ) + parser.add_argument( + "-c", + "--config", + "--parameters_file", + metavar="", + type=str, + required=True, + help="The configuration file (contains all the information related to the training/inference session)", + ) + parser.add_argument( + "-i", + "--inputdata", + "--data_path", + metavar="", + type=str, + required=True, + help="The CSV file of input data that is used to generate the metrics; should contain 3 columns: 'SubjectID,Target,Prediction'", + ) + parser.add_argument( + "-o", + "--outputfile", + "--output_path", + metavar="", + type=str, + default=None, + help="Location to save the output dictionary. If not provided, will print to stdout.", + ) + parser.add_argument( + "-m", + "--missingprediction", + metavar="", + type=int, + default=-1, + help="The value to use for missing predictions as penalty; if `-1`, this does not get added. This is only used in the case where the targets and predictions are passed independently.", + ) + parser.add_argument( + "-v", + "--version", + action="version", + version="%(prog)s v{}".format(version) + "\n\n" + copyrightMessage, + help="Show program's version number and exit.", + ) + + # This is a dummy argument that exists to trigger MLCube mounting requirements. + # Do not remove. + parser.add_argument("-rawinput", "--rawinput", help=argparse.SUPPRESS) + + args = parser.parse_args() + assert args.config is not None, "Missing required parameter: config" + assert args.inputdata is not None, "Missing required parameter: inputdata" + + _generate_metrics( + input_data=args.inputdata, + config=args.config, + output_file=args.outputfile, + missing_prediction=args.missingprediction, + ) + + +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/optimize_model.py b/GANDLF/entrypoints/optimize_model.py new file mode 100644 index 000000000..021f4d65f --- /dev/null +++ b/GANDLF/entrypoints/optimize_model.py @@ -0,0 +1,106 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +from typing import Optional + +from deprecated import deprecated +import click + +from GANDLF.cli import copyrightMessage, post_training_model_optimization +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _optimize_model( + model: str, config: Optional[str], output_path: Optional[str] = None +): + if post_training_model_optimization( + model_path=model, config_path=config, output_path=output_path + ): + print("Post-training model optimization successful.") + else: + print("Post-training model optimization failed.") + + +@click.command() +@click.option( + "--model", + "-m", + type=click.Path(exists=True, file_okay=True, dir_okay=False), + required=True, + help="Path to the model file (ending in '.pth.tar') you wish to optimize.", +) +@click.option( + "--output-path", + "-o", + type=click.Path(file_okay=False, dir_okay=True), + required=False, + help="Location to save the optimized model, defaults to location of `model`", +) +@click.option( + "--config", + "-c", + help="The configuration file (contains all the information related to the training/inference session)." + "Arg value is used if no config in model is found.", + required=False, + type=click.Path(exists=True, file_okay=True, dir_okay=False), +) +@append_copyright_to_help +def new_way( + model: str, config: Optional[str] = None, output_path: Optional[str] = None +): + """Generate optimized versions of trained GaNDLF models.""" + _optimize_model(model=model, config=config, output_path=output_path) + + +# old-fashioned way of running gandlf via `gandlf_optimizeModel`. +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf optimize-model` cli command " + + "instead of `gandlf_optimizeModel`.\n" + + "`gandlf_optimizeModel` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_OptimizeModel", + formatter_class=argparse.RawTextHelpFormatter, + description="Generate optimized versions of trained GaNDLF models.\n\n" + + copyrightMessage, + ) + + parser.add_argument( + "-m", + "--model", + metavar="", + type=str, + help="Path to the model file (ending in '.pth.tar') you wish to optimize.", + required=True, + ) + parser.add_argument( + "-o", + "--outputdir", + "--output_path", + metavar="", + type=str, + default=None, + help="Location to save the optimized model, defaults to location of `model`", + required=False, + ) + parser.add_argument( + "-c", + "--config", + metavar="", + type=str, + default=None, + required=False, + help="The configuration file (contains all the information related to the training/inference session). " + "Arg value is used if no config in model is found.", + ) + + args = parser.parse_args() + _optimize_model(model=args.model, config=args.config, output_path=args.outputdir) + + +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/patch_miner.py b/GANDLF/entrypoints/patch_miner.py new file mode 100644 index 000000000..58b041129 --- /dev/null +++ b/GANDLF/entrypoints/patch_miner.py @@ -0,0 +1,98 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import logging +from typing import Optional +from deprecated import deprecated +import click + +from GANDLF.cli.patch_extraction import patch_extraction +from GANDLF.cli import copyrightMessage +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _mine_patches(input_path: str, output_dir: str, config: Optional[str]): + patch_extraction(input_path, output_dir, config) + logging.info("Finished.") + + +@click.command() +@click.option( + "--input-csv", + "-i", # TODO: check - really csv only fits? + # TODO: should we rename it to --input-path? + type=click.Path(exists=True, file_okay=True, dir_okay=False), + required=True, + help="input path for the tissue", +) +@click.option( + "--output-dir", + "-o", + type=click.Path(file_okay=False, dir_okay=True), + required=True, + help="output directory for the patches", +) +@click.option( + "--config", + "-c", + type=click.Path(exists=True, file_okay=True, dir_okay=False), + required=False, + help="config (in YAML) for running the patch miner. Needs 'scale' and 'patch_size' to be defined, " + "otherwise defaults to 16 and (256, 256), respectively.", +) +@append_copyright_to_help +def new_way(input_csv: str, output_dir: str, config: Optional[str]): + """Construct patches from whole slide image(s).""" + _mine_patches(input_path=input_csv, output_dir=output_dir, config=config) + + +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf patch-miner` cli command " + + "instead of `gandlf_patchMiner`. Note that in new CLI tool some params were renamed to snake-case:\n" + + " --input_CSV to --input-csv\n" + + " --output_path to --output-path\n" + + "`gandlf_patchMiner` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_PatchMiner", + formatter_class=argparse.RawTextHelpFormatter, + description="Construct patches from whole slide image(s).\n\n" + + copyrightMessage, + ) + + parser.add_argument( + "-i", + "--input_CSV", + dest="input_path", + help="input path for the tissue", + required=True, + ) + parser.add_argument( + "-o", + "--output_path", + dest="output_path", + default=None, + required=True, + help="output path for the patches", + ) + parser.add_argument( + "-c", + "--config", + type=str, + dest="config", + help="config (in YAML) for running the patch miner. Needs 'scale' and 'patch_size' to be defined, otherwise defaults to 16 and (256, 256), respectively.", + required=False, + ) + + args = parser.parse_args() + _mine_patches( + input_path=args.input_path, output_dir=args.output_path, config=args.config + ) + + +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/preprocess.py b/GANDLF/entrypoints/preprocess.py new file mode 100644 index 000000000..265512c11 --- /dev/null +++ b/GANDLF/entrypoints/preprocess.py @@ -0,0 +1,182 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import ast +import logging + +import click +from deprecated import deprecated +from GANDLF.cli import preprocess_and_save, copyrightMessage +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _preprocess( + config: str, + input_data: str, + output_dir: str, + label_pad: str, + apply_augs: bool, + crop_zero: bool, +): + print(f"{config=}") + print(f"{input_data=}") + print(f"{output_dir=}") + print(f"{label_pad=}") + print(f"{apply_augs=}") + print(f"{crop_zero=}") + preprocess_and_save( + data_csv=input_data, + config_file=config, + output_dir=output_dir, + label_pad_mode=label_pad, + applyaugs=apply_augs, + apply_zero_crop=crop_zero, + ) + + # TODO: in `old_way` default logging level is warning, thus those 'finished' are not printed anymore + logging.info("Finished.") + + +@click.command() +@click.option( + "--config", + "-c", + type=click.Path(exists=True, file_okay=True, dir_okay=False), + required=True, + help="The configuration file (contains all the information related to the training/inference session)," + " this is read from 'output' during inference", +) +@click.option( + "--input-data", + "-i", # TODO: mention pickled df also fits + type=click.Path(exists=True, file_okay=True, dir_okay=False), + required=True, + help="Data csv file that is used for training/inference", +) +@click.option( + "--output-dir", + "-o", + type=click.Path(file_okay=False, dir_okay=True), + required=True, + help="Output directory to save intermediate files and model weights", +) +@click.option( + "--label-pad", + "-l", + type=str, + default="constant", + help="Specifies the padding strategy for the label when 'patch_sampler' is 'label'. " + "Defaults to 'constant' [full list: https://numpy.org/doc/stable/reference/generated/numpy.pad.html]", +) +@click.option( + "--apply-augs", + "-a", + is_flag=True, + help="If passed, applies data augmentations during output creation", +) +@click.option( + "--crop-zero", + "-z", + is_flag=True, + help="If passed, applies zero cropping during output creation.", +) +@append_copyright_to_help +def new_way( + config: str, + input_data: str, + output_dir: str, + label_pad: str, + apply_augs: bool, + crop_zero: bool, +): + """Generate training/inference data which are preprocessed to reduce resource footprint during computation.""" + _preprocess( + config=config, + input_data=input_data, + output_dir=output_dir, + label_pad=label_pad, + apply_augs=apply_augs, + crop_zero=crop_zero, + ) + + +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf preprocess` cli command " + + "instead of `gandlf_preprocess`. Note that in new CLI tool some params were renamed to snake-case:\n" + + " --inputdata to --input-data\n" + + " --labelPad to --label-pad\n" + + " --applyaugs to --apply-augs; it is flag now, i.e. no value accepted\n" + + " --cropzero to --crop-zero; it is flag now, i.e. no value accepted\n" + + "`gandlf_preprocess` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_Preprocess", + formatter_class=argparse.RawTextHelpFormatter, + description="Generate training/inference data which are preprocessed to reduce resource footprint during computation.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-c", + "--config", + metavar="", + type=str, + help="The configuration file (contains all the information related to the training/inference session), this is read from 'output' during inference", + required=True, + ) + parser.add_argument( + "-i", + "--inputdata", + metavar="", + type=str, + help="Data csv file that is used for training/inference", + required=True, + ) + parser.add_argument( + "-o", + "--output", + metavar="", + type=str, + help="Output directory to save intermediate files and model weights", + required=True, + ) + parser.add_argument( + "-l", + "--labelPad", + metavar="", + type=str, + default="constant", + help="This specifies the padding strategy for the label when 'patch_sampler' is 'label'. Defaults to 'constant' [full list: https://numpy.org/doc/stable/reference/generated/numpy.pad.html]", + required=False, + ) + parser.add_argument( + "-a", + "--applyaugs", + metavar="", + type=ast.literal_eval, + default=False, + help="This specifies the whether to apply data augmentation during output creation. Defaults to False", + required=False, + ) + parser.add_argument( + "-z", + "--cropzero", + metavar="", + type=ast.literal_eval, + default=False, + help="This specifies the whether to apply zero cropping during output creation. Defaults to False", + required=False, + ) + + args = parser.parse_args() + _preprocess( + config=args.config, + input_data=args.inputdata, + output_dir=args.output, + label_pad=args.labelPad, + apply_augs=args.applyaugs, + crop_zero=args.cropzero, + ) diff --git a/GANDLF/entrypoints/recover_config.py b/GANDLF/entrypoints/recover_config.py new file mode 100644 index 000000000..6168b2ad1 --- /dev/null +++ b/GANDLF/entrypoints/recover_config.py @@ -0,0 +1,104 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +from typing import Optional + +import click +from deprecated import deprecated + +from GANDLF.cli import copyrightMessage, recover_config +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _recover_config(model_dir: Optional[str], mlcube: bool, output_file: str): + if mlcube: + search_dir = "/embedded_model/" + else: + search_dir = model_dir + + print(f"{model_dir=}") + print(f"{mlcube=}") + print(f"{search_dir=}") + print(f"{output_file=}") + result = recover_config(search_dir, output_file) + assert result, "Config file recovery failed." + + +@click.command() +@click.option( + "--model-dir", + "-m", + help="Path to the model directory.", + type=click.Path(exists=True, file_okay=False, dir_okay=True), +) +@click.option( + "--mlcube", + "-c", + is_flag=True, + help="Pass this option to attempt to extract the config from the embedded model in a GaNDLF MLCube " + "(if any). Only useful in that context. If passed, model-dir param is ignored.", +) +@click.option( + "--output-file", + "-o", + required=True, + type=click.Path(file_okay=True, dir_okay=False), + help="Path to an output file where the config will be written.", +) +@append_copyright_to_help +def new_way(model_dir, mlcube, output_file): + """Recovers a config file from a GaNDLF model. If used from within a deployed GaNDLF MLCube, + attempts to extract the config from the embedded model.""" + _recover_config(model_dir=model_dir, mlcube=mlcube, output_file=output_file) + + +# old-fashioned way of running gandlf via `gandlf_recoverConfig`. +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf recover-config` cli command " + + "instead of `gandlf_recoverConfig`. Note that in new CLI tool some params were renamed or changed its behavior:\n" + + " --modeldir to --model-dir\n" + + " --mlcube is flag now with default False if not passed. Does not require to pass any additional values\n" + + " --outputFile to --output-file`\n" + + "`gandlf_recoverConfig` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_RecoverConfig", + formatter_class=argparse.RawTextHelpFormatter, + description="Recovers a config file from a GaNDLF model. If used from within a deployed GaNDLF MLCube, attempts to extract the config from the embedded model.\n\n" + + copyrightMessage, + ) + + parser.add_argument( + "-m", "--modeldir", metavar="", type=str, help="Path to the model directory." + ) + # TODO: despite of `str` type, real value is never used (only checks if it is filled or not) + # Thus, caveats: + # * passing `--mlcube False` would still process it as mlcube; + # * passing `--mlcube "" ` (with empty str) acts as non-mlcube + parser.add_argument( + "-c", + "--mlcube", + metavar="", + type=str, + help="Pass this option to attempt to extract the config from the embedded model in a GaNDLF MLCube (if any). Only useful in that context. If passed, model-dir param is ignored.", + ) + parser.add_argument( + "-o", + "--outputFile", + metavar="", + type=str, + required=True, + help="Path to an output file where the config will be written.", + ) + + args = parser.parse_args() + + _recover_config(args.modeldir, args.mlcube, args.outputFile) + + +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/run.py b/GANDLF/entrypoints/run.py new file mode 100644 index 000000000..f5aa73b1b --- /dev/null +++ b/GANDLF/entrypoints/run.py @@ -0,0 +1,286 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- +import logging +import os +import argparse +import ast + +# import traceback +from typing import Optional + +from deprecated import deprecated +import click + +from GANDLF import version +from GANDLF.cli import main_run, copyrightMessage +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _run( + config: str, + input_data: str, + train_flag: bool, + model_dir: str, + device: str, + reset_flag: bool, + resume_flag: bool, + output_path: Optional[str], +): + if model_dir is None and output_path: + model_dir = output_path + + assert model_dir is not None, "Missing required parameter: model_dir" + + if os.path.isdir(input_data): + # Is this a fine assumption to make? + # Medperf models receive the data generated by the data preparator mlcube + # We can therefore ensure the output of that mlcube contains a data.csv file + filename = "data.csv" + input_data = os.path.join(input_data, filename) + + if not train_flag: + # TODO: print a warning if any of these flags is activated. They are not available for inference mode + # Maybe user misconfigured the command. + # if inference mode, then no need to check for reset/resume + reset_flag, resume_flag = False, False + + if reset_flag and resume_flag: + logging.warning( + "'reset' and 'resume' are mutually exclusive; 'resume' will be used." + ) + reset_flag = False + + # TODO: check that output_path is not passed in training mode; + # maybe user misconfigured the command + + logging.debug(f"{config=}") + logging.debug(f"{input_data=}") + logging.debug(f"{train_flag=}") + logging.debug(f"{model_dir=}") + logging.debug(f"{device=}") + logging.debug(f"{reset_flag=}") + logging.debug(f"{resume_flag=}") + logging.debug(f"{output_path=}") + + main_run( + data_csv=input_data, + config_file=config, + model_dir=model_dir, + train_mode=train_flag, + device=device, + resume=resume_flag, + reset=reset_flag, + output_dir=output_path, + ) + print("Finished.") + + +@click.command() +@click.option( + "--config", + "-c", + required=True, + help="The configuration file (contains all the information related to the training/inference session)", + type=click.Path(exists=True, file_okay=True, dir_okay=False), +) +@click.option( + "--input-data", + "-i", + required=True, + type=str, + help="Data CSV file that is used for training/inference; " + "can also take comma-separated training-validation pre-split CSVs;" + "can also take a path to folder with `data.csv`", +) +@click.option( + "--train/--infer", + "-t/--infer", + required=True, # TODO: what if we make infer as default behavior if no param is passed? + help="If we run training or inference; for inference, " + "there needs to be a compatible model saved in '-model-dir'", +) +@click.option( + "--model-dir", + "-m", + type=click.Path(file_okay=False, dir_okay=True), + help="Training: Output directory to save intermediate files and model weights; " + "inference: location of previous training session output", +) +@click.option( + "--device", + "-d", + # TODO: Not sure it's worth to restrict this list. What about other devices? + # GaNDLF guarantees to work properly with these two options, but + # other values may be partially working also. + # * GaNDLF code convert `-1` to `cpu` (i.e. it is expected somebody may pass -1) + # * `cuda:0` should work also, isn't it? Just would not be treated as `cuda` + # * Would `mps` work? + # * int values (like `1`) - are they supported? (legacy mode for cuda https://pytorch.org/docs/stable/tensor_attributes.html#torch-device) + type=click.Choice(["cuda", "cpu"]), + required=True, # FIXME: either keep default value, or set required flag + help="Device to perform requested session on 'cpu' or 'cuda'; " + "for cuda, ensure CUDA_VISIBLE_DEVICES env var is set", +) +@click.option( + "--reset", + "-rt", + is_flag=True, + help="Completely resets the previous run by deleting 'model-dir'", +) +@click.option( + "--resume", + "-rm", + is_flag=True, + help="Resume previous training by only keeping model dict in 'model-dir'", +) +@click.option( + "--output-path", + "-o", + type=click.Path(file_okay=False, dir_okay=True), + help="Location to save the output of the inference session. Not used for training.", +) +@click.option("--raw-input", hidden=True) +@append_copyright_to_help +def new_way( + config: str, + input_data: str, + train: bool, + model_dir: str, + device: str, + reset: bool, + resume: bool, + output_path: str, + raw_input: str, +): + """Semantic segmentation, regression, and classification for medical images using Deep Learning.""" + _run( + config=config, + input_data=input_data, + train_flag=train, + model_dir=model_dir, + device=device, + reset_flag=reset, + resume_flag=resume, + output_path=output_path, + ) + + +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf run` cli command " + + "instead of `gandlf_run`. Note that in new CLI tool some params were renamed or changed its behavior:\n" + + " --parameters_file to --config\n" + + " --inputdata/--data_path to --input-data\n" + + " --train changed its behavior: instead of `--train True/False` pass `--train/--infer`\n" + + " --modeldir to --model-dir\n" + + " --reset is flag now with default False value if not passed\n" + + " --resume is flag now with default False value if not passed\n" + + " --outputdir/--output_path to --output-path; in training mode, use --model-dir instead\n" + + " --version removed; use `gandlf --version` instead\n" + + "`gandlf_run` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF", + formatter_class=argparse.RawTextHelpFormatter, + description="Semantic segmentation, regression, and classification for medical images using Deep Learning.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-c", + "--config", + "--parameters_file", + metavar="", + type=str, + required=True, + help="The configuration file (contains all the information related to the training/inference session)", + ) + parser.add_argument( + "-i", + "--inputdata", + "--data_path", + metavar="", + type=str, + required=True, + help="Data CSV file that is used for training/inference; can also take comma-separated training-validation pre-split CSVs", + ) + parser.add_argument( + "-t", + "--train", + metavar="", + type=ast.literal_eval, + required=True, + help="True: training and False: inference; for inference, there needs to be a compatible model saved in '-modeldir'", + ) + parser.add_argument( + "-m", + "--modeldir", + metavar="", + type=str, + help="Training: Output directory to save intermediate files and model weights; inference: location of previous training session output", + ) + parser.add_argument( + "-d", + "--device", + default="cuda", # TODO: default value doesn't work as arg is required + metavar="", + type=str, + required=True, + help="Device to perform requested session on 'cpu' or 'cuda'; for cuda, ensure CUDA_VISIBLE_DEVICES env var is set", + ) + parser.add_argument( + "-rt", + "--reset", + metavar="", + default=False, + type=ast.literal_eval, + help="Completely resets the previous run by deleting 'modeldir'", + ) + parser.add_argument( + "-rm", + "--resume", + metavar="", + default=False, + type=ast.literal_eval, + help="Resume previous training by only keeping model dict in 'modeldir'", + ) + parser.add_argument( + "-o", + "--outputdir", + "--output_path", + metavar="", + type=str, + help="Location to save the output of the inference session. Not used for training.", + ) + parser.add_argument( + "-v", + "--version", + action="version", + version="%(prog)s v{}".format(version) + "\n\n" + copyrightMessage, + help="Show program's version number and exit.", + ) + + # This is a dummy argument that exists to trigger MLCube mounting requirements. + # Do not remove. + parser.add_argument("-rawinput", "--rawinput", help=argparse.SUPPRESS) + + args = parser.parse_args() + + # config file should always be present + assert os.path.isfile(args.config), "Configuration file not found!" + + _run( + config=args.config, + input_data=args.inputdata, + train_flag=args.train, + model_dir=args.modeldir, + device=args.device, + reset_flag=args.reset, + resume_flag=args.resume, + output_path=args.outputdir, + ) + + +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/split_csv.py b/GANDLF/entrypoints/split_csv.py new file mode 100644 index 000000000..fa1cddf1d --- /dev/null +++ b/GANDLF/entrypoints/split_csv.py @@ -0,0 +1,116 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import os +import yaml +from typing import Optional + +import click +from deprecated.classic import deprecated + +from GANDLF.cli import copyrightMessage, split_data_and_save_csvs +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _split_csv(input_csv: str, output_dir: str, config_path: Optional[str]): + input_csv = os.path.normpath(input_csv) + output_dir = os.path.normpath(output_dir) + # initialize default + config = yaml.safe_load(open(config_path, "r")) + + print("Config used for split:", config) + + split_data_and_save_csvs(input_csv, output_dir, config) + + print("Finished successfully.") + + +@click.command() +@click.option( + "--input-csv", + "-i", + required=True, + type=click.Path(exists=True, file_okay=True, dir_okay=False), + help="Input CSV file which contains the data to be split.", +) +@click.option( + "--output-dir", + "-o", + required=True, + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Output directory to save the split data.", +) +@click.option( + "--config", + "-c", + required=True, + help="The GaNDLF config (in YAML) with the `nested_training` key specified to the folds needed.", + type=click.Path(exists=True, file_okay=True, dir_okay=False), +) +@append_copyright_to_help +def new_way(input_csv: str, output_dir: str, config: Optional[str]): + """Split the data into training, validation, and testing sets and save them as csvs in the output directory.""" + _split_csv(input_csv, output_dir, config) + + +# old-fashioned way of running gandlf via `gandlf_splitCSV`. +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf split-csv` cli command " + + "instead of `gandlf_splitCSV`. Note that in new CLI tool some params were renamed:\n" + + " --inputCSV to --input-csv\n" + + " --outputDir to --output-dir\n" + + "`gandlf_splitCSV` script would be deprecated soon." +) +def old_way(): + logger_setup() + parser = argparse.ArgumentParser( + prog="GANDLF_SplitCSV", + formatter_class=argparse.RawTextHelpFormatter, + description="Split the data into training, validation, and testing sets and save them as csvs in the output directory.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-i", + "--inputCSV", + metavar="", + default=None, + type=str, + required=True, + help="Input CSV file which contains the data to be split.", + ) + parser.add_argument( + "-c", + "--config", + metavar="", + default=None, + required=True, + type=str, + help="The GaNDLF config (in YAML) with the `nested_training` key specified to the folds needed.", + ) + parser.add_argument( + "-o", + "--outputDir", + metavar="", + default=None, + type=str, + required=True, + help="Output directory to save the split data.", + ) + + args = parser.parse_args() + + # check for required parameters - this is needed here to keep the cli clean + for param_name in ["inputCSV", "outputDir", "config"]: + param_none_check = getattr(args, param_name) + assert param_none_check is not None, f"Missing required parameter: {param_name}" + + _split_csv( + input_csv=args.inputCSV, output_dir=args.outputDir, config_path=args.config + ) + + +# main function +if __name__ == "__main__": + old_way() diff --git a/GANDLF/entrypoints/subcommands.py b/GANDLF/entrypoints/subcommands.py new file mode 100644 index 000000000..814b00b66 --- /dev/null +++ b/GANDLF/entrypoints/subcommands.py @@ -0,0 +1,32 @@ +from GANDLF.entrypoints.anonymizer import new_way as anonymizer_command +from GANDLF.entrypoints.run import new_way as run_command +from GANDLF.entrypoints.construct_csv import new_way as construct_csv_command +from GANDLF.entrypoints.collect_stats import new_way as collect_stats_command +from GANDLF.entrypoints.patch_miner import new_way as patch_miner_command +from GANDLF.entrypoints.preprocess import new_way as preprocess_command +from GANDLF.entrypoints.verify_install import new_way as verify_install_command +from GANDLF.entrypoints.config_generator import new_way as config_generator_command +from GANDLF.entrypoints.recover_config import new_way as recover_config_command +from GANDLF.entrypoints.deploy import new_way as deploy_command +from GANDLF.entrypoints.optimize_model import new_way as optimize_model_command +from GANDLF.entrypoints.generate_metrics import new_way as generate_metrics_command +from GANDLF.entrypoints.debug_info import new_way as debug_info_command +from GANDLF.entrypoints.split_csv import new_way as split_csv_command + + +cli_subcommands = { + "anonymizer": anonymizer_command, + "run": run_command, + "construct-csv": construct_csv_command, + "collect-stats": collect_stats_command, + "patch-miner": patch_miner_command, + "preprocess": preprocess_command, + "config-generator": config_generator_command, + "verify-install": verify_install_command, + "recover-config": recover_config_command, + "deploy": deploy_command, + "optimize-model": optimize_model_command, + "generate-metrics": generate_metrics_command, + "debug-info": debug_info_command, + "split-csv": split_csv_command, +} diff --git a/GANDLF/entrypoints/verify_install.py b/GANDLF/entrypoints/verify_install.py new file mode 100644 index 000000000..0a94b1e08 --- /dev/null +++ b/GANDLF/entrypoints/verify_install.py @@ -0,0 +1,49 @@ +#!usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import click +from deprecated import deprecated + +from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import logger_setup + + +def _verify_install(): + try: + import GANDLF as gf + + print("GaNDLF installed version:", gf.__version__) + except Exception as e: + raise Exception( + "GaNDLF not properly installed, please see https://mlcommons.github.io/GaNDLF/setup" + ) from e + + print("GaNDLF is ready. See https://mlcommons.github.io/GaNDLF/usage") + + +@click.command() +@append_copyright_to_help +def new_way(): + """Verify GaNDLF installation.""" + _verify_install() + + +# main function +@deprecated( + "This is a deprecated way of running GanDLF. Please, use `gandlf verify-install` cli command " + + "instead of `gandlf_verifyInstall`.\n" + + "`gandlf_verifyInstall` script would be deprecated soon." +) +def old_way(): + logger_setup() + argparse.ArgumentParser( + prog="GANDLF_VerifyInstall", + formatter_class=argparse.RawTextHelpFormatter, + description="Verify GaNDLF installation.", + ) + _verify_install() + + +if __name__ == "__main__": + old_way() diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml new file mode 100644 index 000000000..d569aa90f --- /dev/null +++ b/GANDLF/logging_config.yaml @@ -0,0 +1,47 @@ +version: 1 +formatters: + detailed: + format: "%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(funcName)s:%(lineno)d - %(message)s" + datefmt: "%Y-%m-%d %H:%M:%S" + log_colors: + DEBUG: "white" + INFO: "green" + WARNING: "yellow" + ERROR: "red" + CRITICAL: "bold_red" + simple: + (): colorlog.ColoredFormatter + format: "%(log_color)s%(asctime)s - %(levelname)s - %(message)s" + datefmt: "%Y-%m-%d %H:%M:%S" +filters: + warnings_filter: + (): logging.Filter + name: "py.warnings" + info_only_filter: + (): GANDLF.utils.gandlf_logging.InfoOnlyFilter +handlers: + stdoutHandler: # only display info level + class: logging.StreamHandler + level: INFO + formatter: simple + filters: [info_only_filter] + stream: ext://sys.stdout + stderrHandler: # display warning and above messages + class: logging.StreamHandler + level: WARNING + formatter: detailed + stream: ext://sys.stderr + rotatingFileHandler: + class: logging.handlers.RotatingFileHandler + level: DEBUG + formatter: detailed + maxBytes: 10485760 + backupCount: 2 +loggers: # you can add your customized logger + debug_logger: + level: DEBUG + handlers: [stdoutHandler, rotatingFileHandler, stderrHandler] + propagate: no +root: + level: DEBUG + handlers: [stdoutHandler, rotatingFileHandler, stderrHandler] diff --git a/GANDLF/metrics/classification.py b/GANDLF/metrics/classification.py index 5ef113fda..dd5936ed9 100644 --- a/GANDLF/metrics/classification.py +++ b/GANDLF/metrics/classification.py @@ -1,15 +1,13 @@ -from typing import Union - import torch import torchmetrics as tm -from torch.nn.functional import one_hot +import torch.nn.functional as F + +# from torch.nn.functional import one_hot from ..utils import get_output_from_calculator from GANDLF.utils.generic import determine_classification_task_type -def overall_stats( - prediction: torch.Tensor, target: torch.Tensor, params: dict -) -> dict[str, Union[float, list]]: +def overall_stats(prediction: torch.Tensor, target: torch.Tensor, params: dict) -> dict: """ Generates a dictionary of metrics calculated on the overall prediction and ground truths. @@ -25,6 +23,36 @@ def overall_stats( params["problem_type"] == "classification" ), "Only classification is supported for these stats" + def __convert_tensor_to_int(input_tensor: torch.Tensor) -> torch.Tensor: + """ + Convert the input tensor to integer format. + + Args: + input_tensor (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: The tensor converted to integer format. + """ + return_tensor = input_tensor.detach().clone() + if return_tensor.dtype != torch.long or return_tensor.dtype != torch.int: + return_tensor = return_tensor.long() + return return_tensor + + # this is needed for a few metrics + # ensure that predictions and target are in integer format + prediction_wrap = __convert_tensor_to_int(prediction) + target_wrap = __convert_tensor_to_int(target) + + # this is needed for auroc + # ensure that predictions are in integer format + prediction_wrap = prediction.detach().clone() + if prediction.dtype != torch.long or prediction.dtype != torch.int: + prediction_wrap = prediction_wrap.long() + predictions_one_hot = F.one_hot( + prediction_wrap, num_classes=params["model"]["num_classes"] + ) + predictions_prob = F.softmax(predictions_one_hot.float(), dim=1) + output_metrics = {} average_types_keys = { @@ -34,79 +62,63 @@ def overall_stats( "per_class_weighted": "weighted", } task = determine_classification_task_type(params) - # consider adding a "multilabel field in the future" - # metrics that need the "average" parameter + # todo: consider adding a "multilabel field in the future" - for average_type_key in average_types_keys.values(): + # metrics that need the "average" parameter + for average_type, average_type_key in average_types_keys.items(): # multidim_average is not used when constructing these metrics # think of having it calculators = { - "accuracy": tm.Accuracy( + f"accuracy_{average_type}": tm.Accuracy( task=task, num_classes=params["model"]["num_classes"], average=average_type_key, ), - "precision": tm.Precision( + f"precision_{average_type}": tm.Precision( task=task, num_classes=params["model"]["num_classes"], average=average_type_key, ), - "recall": tm.Recall( + f"recall_{average_type}": tm.Recall( task=task, num_classes=params["model"]["num_classes"], average=average_type_key, ), - "f1": tm.F1Score( + f"f1_{average_type}": tm.F1Score( task=task, num_classes=params["model"]["num_classes"], average=average_type_key, ), - "specificity": tm.Specificity( + f"specificity_{average_type}": tm.Specificity( task=task, num_classes=params["model"]["num_classes"], average=average_type_key, ), - # "aucroc": tm.AUROC( - # task=task, - # num_classes=params["model"]["num_classes"], - # average=average_type_key if average_type_key != "micro" else "macro", - # ), + f"auroc_{average_type}": tm.AUROC( + task=task, + num_classes=params["model"]["num_classes"], + average=average_type_key if average_type_key != "micro" else "macro", + ), } for metric_name, calculator in calculators.items(): - avg_typed_metric_name = f"{metric_name}_{average_type_key}" - if metric_name == "aucroc": - one_hot_preds = one_hot( - prediction.long(), num_classes=params["model"]["num_classes"] - ) - output_metrics[avg_typed_metric_name] = get_output_from_calculator( - one_hot_preds.float(), target, calculator + if "auroc" in metric_name: + output_metrics[metric_name] = get_output_from_calculator( + predictions_prob, target_wrap, calculator ) else: - output_metrics[avg_typed_metric_name] = get_output_from_calculator( + output_metrics[metric_name] = get_output_from_calculator( prediction, target, calculator ) - #### HERE WE NEED TO MODIFY TESTS - ROC IS RETURNING A TUPLE. WE MAY ALSO DISCARD IT #### - # what is AUC metric telling at all? Computing it for prediction and ground truth - # is not making sense - # metrics that do not have any "average" parameter - # calculators = { - # - # # "auc": tm.AUC(reorder=True), - # ## weird error for multi-class problem, where pos_label is not getting set - # "roc": tm.ROC(task=task, num_classes=params["model"]["num_classes"]), - # } - # for metric_name, calculator in calculators.items(): - # if metric_name == "roc": - # one_hot_preds = one_hot( - # prediction.long(), num_classes=params["model"]["num_classes"] - # ) - # output_metrics[metric_name] = get_output_from_calculator( - # one_hot_preds.float(), target, calculator - # ) - # else: - # output_metrics[metric_name] = get_output_from_calculator( - # prediction, target, calculator - # ) + # metrics that do not need the "average" parameter + calculators = { + "mcc": tm.MatthewsCorrCoef( + task=task, num_classes=params["model"]["num_classes"] + ) + } + for metric_name, calculator in calculators.items(): + output_metrics[metric_name] = get_output_from_calculator( + prediction, target, calculator + ) return output_metrics diff --git a/GANDLF/metrics/segmentation.py b/GANDLF/metrics/segmentation.py index 52d989dd7..82254079f 100644 --- a/GANDLF/metrics/segmentation.py +++ b/GANDLF/metrics/segmentation.py @@ -272,8 +272,9 @@ def get_sensitivity_and_specificity(result_array, target_array): if per_label: return torch.tensor(sensitivity_per_label), torch.tensor(specificity_per_label) else: - return torch.tensor(sensitivity / avg_counter), torch.tensor( - specificity / avg_counter + return ( + torch.tensor(sensitivity / avg_counter), + torch.tensor(specificity / avg_counter), ) diff --git a/GANDLF/models/Readme.md b/GANDLF/models/Readme.md index f64379cc1..8e69b225d 100644 --- a/GANDLF/models/Readme.md +++ b/GANDLF/models/Readme.md @@ -23,4 +23,5 @@ - All parameters should be taken as input, with special parameters (for e.g., `residualConnections` for `unet`) should not be exposed to the parameters dict, and should be handled separately via another class. - For example, `GANDLF.models.unet.unet` has a `residualConnections` parameter, which is not exposed to the parameters dict, and a separate class `GANDLF.models.unet.resunet` is defined which enables this flag. - Add the model's identifier to `GANDLF.models.__init__.global_model_dict` as appropriate. -- Call the new mode from the config using the `model` key. \ No newline at end of file +- Call the new mode from the config using the `model` key. +- To update the tests, append the new model key to either in `all_models_segmentation`, `all_models_segmentation` or `all_models_classification` in [test_full.py](https://github.com/mlcommons/GaNDLF/blob/master/testing/test_full.py) diff --git a/GANDLF/models/__init__.py b/GANDLF/models/__init__.py index a21c5849d..898e237c7 100644 --- a/GANDLF/models/__init__.py +++ b/GANDLF/models/__init__.py @@ -9,6 +9,7 @@ from .vgg import vgg11, vgg13, vgg16, vgg19 from .densenet import densenet121, densenet169, densenet201, densenet264 from .resnet import resnet18, resnet34, resnet50, resnet101, resnet152, resnet200 +from .dynunet_wrapper import dynunet_wrapper from .efficientnet import ( efficientnetB0, efficientnetB1, @@ -101,6 +102,7 @@ "efficientnetb5": efficientnetB5, "efficientnetb6": efficientnetB6, "efficientnetb7": efficientnetB7, + "dynunet": dynunet_wrapper, # Custom models "msdnet": MSDNet, "brain_age": brainage, diff --git a/GANDLF/models/brain_age.py b/GANDLF/models/brain_age.py index 6e1f344da..221d81f38 100644 --- a/GANDLF/models/brain_age.py +++ b/GANDLF/models/brain_age.py @@ -23,11 +23,8 @@ def brainage(parameters): parameters["model"]["dimension"] == 2 ), "Brain Age predictions only work on 2D data" - try: - # Load the pretrained VGG16 model - model = torchvision.models.vgg16(pretrained=True) - except Exception: - sys.exit("Error: Failed to load VGG16 model: " + traceback.format_exc()) + # Load the pretrained VGG16 model + model = torchvision.models.vgg16(pretrained=True) # Remove the final convolutional layer model.final_convolution_layer = None diff --git a/GANDLF/models/densenet.py b/GANDLF/models/densenet.py index b293f8830..62cc0ea34 100644 --- a/GANDLF/models/densenet.py +++ b/GANDLF/models/densenet.py @@ -185,8 +185,6 @@ def __init__(self, parameters: dict, block_config=(6, 12, 24, 16)): elif self.n_dimensions == 3: self.output_size = (1, 1, 1) self.conv_stride = (parameters["conv1_t_stride"], 2, 2) - else: - sys.exit("Only 2D or 3D convolutions are supported.") # First convolution self.features = [ diff --git a/GANDLF/models/dynunet_wrapper.py b/GANDLF/models/dynunet_wrapper.py new file mode 100644 index 000000000..3f209a3d8 --- /dev/null +++ b/GANDLF/models/dynunet_wrapper.py @@ -0,0 +1,88 @@ +from .modelBase import ModelBase +import monai.networks.nets.dynunet as dynunet + + +class dynunet_wrapper(ModelBase): + """ + More info: https://docs.monai.io/en/stable/networks.html#dynunet + + Args: + spatial_dims (int): number of spatial dimensions. + in_channels (int): number of input channels. + out_channels (int): number of output channels. + kernel_size (Sequence[Union[Sequence[int], int]]): convolution kernel size. + strides (Sequence[Union[Sequence[int], int]]): convolution strides for each blocks. + upsample_kernel_size (Sequence[Union[Sequence[int], int]]): convolution kernel size for transposed convolution layers. The values should equal to strides[1:]. + filters (Optional[Sequence[int]]): number of output channels for each blocks. Defaults to None. + dropout (Union[Tuple, str, float, None]): dropout ratio. Defaults to no dropout. + norm_name (Union[Tuple, str]): feature normalization type and arguments. Defaults to INSTANCE. + act_name (Union[Tuple, str]): activation layer type and arguments. Defaults to leakyrelu. + deep_supervision (bool): whether to add deep supervision head before output. Defaults to False. + deep_supr_num (int): number of feature maps that will output during deep supervision head. The value should be larger than 0 and less than the number of up sample layers. Defaults to 1. + res_block (bool): whether to use residual connection based convolution blocks during the network. Defaults to False. + trans_bias (bool): whether to set the bias parameter in transposed convolution layers. Defaults to False. + """ + + def __init__(self, parameters: dict): + super(dynunet_wrapper, self).__init__(parameters) + + # checking for validation + assert ( + "kernel_size" in parameters["model"] + ) == True, "\033[0;31m`kernel_size` key missing in parameters" + assert ( + "strides" in parameters["model"] + ) == True, "\033[0;31m`strides` key missing in parameters" + + # defining some defaults + # if not ("upsample_kernel_size" in parameters["model"]): + # parameters["model"]["upsample_kernel_size"] = parameters["model"][ + # "strides" + # ][1:] + + parameters["model"]["filters"] = parameters["model"].get("filters", None) + parameters["model"]["act_name"] = parameters["model"].get( + "act_name", ("leakyrelu", {"inplace": True, "negative_slope": 0.01}) + ) + + parameters["model"]["deep_supervision"] = parameters["model"].get( + "deep_supervision", True + ) + + parameters["model"]["deep_supr_num"] = parameters["model"].get( + "deep_supr_num", 1 + ) + + parameters["model"]["res_block"] = parameters["model"].get("res_block", True) + + parameters["model"]["trans_bias"] = parameters["model"].get("trans_bias", False) + parameters["model"]["dropout"] = parameters["model"].get("dropout", None) + + if not ("norm_type" in parameters["model"]): + self.norm_type = "INSTANCE" + + self.model = dynunet.DynUNet( + spatial_dims=self.n_dimensions, + in_channels=self.n_channels, + out_channels=self.n_classes, + kernel_size=parameters["model"]["kernel_size"], + strides=parameters["model"]["strides"], + upsample_kernel_size=parameters["model"]["strides"][1:], + filters=parameters["model"][ + "filters" + ], # number of output channels for each blocks + dropout=parameters["model"][ + "dropout" + ], # dropout ratio. Defaults to no dropout + norm_name=self.norm_type, + act_name=parameters["model"]["act_name"], + deep_supervision=parameters["model"]["deep_supervision"], + deep_supr_num=parameters["model"][ + "deep_supr_num" + ], # number of feature maps that will output during deep supervision head. + res_block=parameters["model"]["res_block"], + trans_bias=parameters["model"]["trans_bias"], + ) + + def forward(self, x): + return self.model.forward(x) diff --git a/GANDLF/models/efficientnet.py b/GANDLF/models/efficientnet.py index 57bba9dcb..02df3b316 100644 --- a/GANDLF/models/efficientnet.py +++ b/GANDLF/models/efficientnet.py @@ -394,8 +394,6 @@ def __init__(self, parameters: dict, scale_params): # how to scale depth and wi self.output_size = (1, 1) elif self.n_dimensions == 3: self.output_size = (1, 1, 1) - else: - sys.exit("Only 2D or 3D convolutions are supported.") if self.Norm is None: sys.stderr.write( "Warning: efficientnet is not defined without a normalization layer" diff --git a/GANDLF/models/modelBase.py b/GANDLF/models/modelBase.py index 226c6f911..3abe76cb7 100644 --- a/GANDLF/models/modelBase.py +++ b/GANDLF/models/modelBase.py @@ -53,6 +53,12 @@ def __init__(self, parameters): # based on dimensionality, the following need to defined: # convolution, batch_norm, instancenorm, dropout + assert self.n_dimensions in [ + 2, + 3, + ], "GaNDLF only supports 2D and 3D computations. {}D computations are not currently supported".format( + self.n_dimensions + ) if self.n_dimensions == 2: self.Conv = nn.Conv2d self.ConvTranspose = nn.ConvTranspose2d @@ -88,13 +94,6 @@ def __init__(self, parameters): elif converter_type == "conv3d": self.converter = Conv3dConverter - else: - raise ValueError( - "GaNDLF only supports 2D and 3D computations. {}D computations are not currently supported".format( - self.n_dimensions - ) - ) - def get_final_layer(self, final_convolution_layer: str) -> nn.Module: return get_modelbase_final_layer(final_convolution_layer) diff --git a/GANDLF/models/resnet.py b/GANDLF/models/resnet.py index e24dca221..a72e1eeff 100644 --- a/GANDLF/models/resnet.py +++ b/GANDLF/models/resnet.py @@ -1,4 +1,5 @@ import sys +import logging import torch.nn as nn import torch.nn.functional as F from collections import OrderedDict @@ -26,18 +27,13 @@ def __init__(self, parameters: dict, blockType, block_config): allowedLay = checkPatchDimensions(parameters["patch_size"], len(block_config)) # Display warning message if patch size is not large enough for desired number of layers + assert not ( + allowedLay != len(block_config) and allowedLay <= 0 + ), "The patch size is not large enough for the desired number of layers. It is expected that each dimension of the patch size is 2^(layers + 1)*i, where i is an integer greater than 2." if allowedLay != len(block_config) and allowedLay >= 1: - print( - "The patch size is not large enough for the desired number of layers.", - " It is expected that each dimension of the patch size is 2^(layers + 1)*i, where i is an integer greater than 2.", - "Only the first %d layers will run." % allowedLay, - ) - - # Raise an error if the patch size is too small - elif allowedLay != len(block_config) and allowedLay <= 0: - sys.exit( - "The patch size is not large enough for the desired number of layers.", - " It is expected that each dimension of the patch size is 2^(layers + 1)*i, where i is an integer greater than 2.", + logging.info( + "The patch size is not large enough for the desired number of layers. It is expected that each dimension of the patch size is 2^(layers + 1)*i, where i is an integer greater than 2. Only the first %d layers will run." + % allowedLay ) block_config = block_config[:allowedLay] @@ -51,8 +47,6 @@ def __init__(self, parameters: dict, blockType, block_config): self.output_size = (1, 1) elif self.n_dimensions == 3: self.output_size = (1, 1, 1) - else: - sys.exit("Only 2D or 3D convolutions are supported.") # If normalization layer is not defined, use Batch Normalization if self.Norm is None: diff --git a/GANDLF/optimizers/__init__.py b/GANDLF/optimizers/__init__.py index e952c4226..97de43fa1 100644 --- a/GANDLF/optimizers/__init__.py +++ b/GANDLF/optimizers/__init__.py @@ -10,6 +10,7 @@ adagrad, rmsprop, radam, + nadam, ) from .wrap_monai import novograd_wrapper @@ -27,6 +28,7 @@ "rmsprop": rmsprop, "radam": radam, "novograd": novograd_wrapper, + "nadam": nadam, } diff --git a/GANDLF/optimizers/wrap_torch.py b/GANDLF/optimizers/wrap_torch.py index 9a0455ec5..9852f7973 100644 --- a/GANDLF/optimizers/wrap_torch.py +++ b/GANDLF/optimizers/wrap_torch.py @@ -10,6 +10,7 @@ Adagrad, RMSprop, RAdam, + NAdam, ) @@ -245,3 +246,24 @@ def radam(parameters): weight_decay=parameters["optimizer"].get("weight_decay", 3e-05), foreach=parameters["optimizer"].get("foreach", None), ) + + +def nadam(parameters): + """ + Creates a NAdam optimizer from the PyTorch `torch.optim` module using the input parameters. + + Args: + parameters (dict): A dictionary containing the input parameters for the optimizer. + + Returns: + optimizer (torch.optim.NAdam): A NAdam optimizer. + """ + # Create the optimizer using the input parameters + return NAdam( + parameters["model_parameters"], + lr=parameters.get("learning_rate"), + betas=parameters["optimizer"].get("betas", (0.9, 0.999)), + eps=parameters["optimizer"].get("eps", 1e-8), + weight_decay=parameters["optimizer"].get("weight_decay", 3e-05), + foreach=parameters["optimizer"].get("foreach", None), + ) diff --git a/GANDLF/utils/__init__.py b/GANDLF/utils/__init__.py index 66d830d3d..4c2233153 100644 --- a/GANDLF/utils/__init__.py +++ b/GANDLF/utils/__init__.py @@ -68,3 +68,4 @@ ) from .data_splitter import split_data +from .gandlf_logging import logger_setup, InfoOnlyFilter diff --git a/GANDLF/utils/gandlf_logging.py b/GANDLF/utils/gandlf_logging.py new file mode 100644 index 000000000..9c376aaf0 --- /dev/null +++ b/GANDLF/utils/gandlf_logging.py @@ -0,0 +1,61 @@ +import logging +import yaml +from pathlib import Path +from importlib import resources +import tempfile +from GANDLF.utils import get_unique_timestamp + + +def _create_tmp_log_file(): + tmp_dir = Path(tempfile.gettempdir()) + log_dir = Path.joinpath(tmp_dir, ".gandlf") + log_dir.mkdir(parents=True, exist_ok=True) + log_file = Path.joinpath(log_dir, get_unique_timestamp() + ".log") + return log_file + + +def _create_log_file(log_file): + log_file = Path(log_file) + log_file.write_text("Starting GaNDLF logging session \n") + + +def _configure_logging_with_logfile(log_file, config_path): + with resources.open_text("GANDLF", config_path) as file: + config_dict = yaml.safe_load(file) + config_dict["handlers"]["rotatingFileHandler"]["filename"] = str(log_file) + logging.config.dictConfig(config_dict) + + +def logger_setup(log_file=None, config_path="logging_config.yaml") -> None: + """ + It sets up the logger. Reads from logging_config. + + Args: + log_file (str): dir path for saving the logs, defaults to `None`, at which time logs are flushed to console. + config_path (str): file path for the configuration + + """ + + logging.captureWarnings(True) + log_tmp_file = log_file + if log_file is None: # create tmp file + log_tmp_file = _create_tmp_log_file() + logging.info(f"The logs are saved in {log_tmp_file}") + _create_log_file(log_tmp_file) + _configure_logging_with_logfile(log_tmp_file, config_path) + + +class InfoOnlyFilter(logging.Filter): + """ + Display only INFO messages. + """ + + def filter(self, record): + """ + Determines if the specified record is to be logged. + Args: + record (logging.LogRecord): The log record to be evaluated. + Returns: + bool: True if the log record should be processed, False otherwise. + """ + return record.levelno == logging.INFO diff --git a/GANDLF/utils/modelio.py b/GANDLF/utils/modelio.py index a83a6d952..d9c069804 100644 --- a/GANDLF/utils/modelio.py +++ b/GANDLF/utils/modelio.py @@ -26,7 +26,10 @@ def optimize_and_save_model( - model: torch.nn.Module, params: dict, path: str, onnx_export: Optional[bool] = True + model: torch.nn.Module, + params: dict, + output_path: str, + onnx_export: Optional[bool] = True, ) -> None: """ Perform post-training optimization and save it to a file. @@ -34,7 +37,7 @@ def optimize_and_save_model( Args: model (torch.nn.Module): Trained torch model. params (dict): The parameter dictionary. - path (str): The path to save the model dictionary to. + output_path (str): The path to save the optimized model to. onnx_export (Optional[bool]): Whether to export to ONNX and OpenVINO. Defaults to True. """ # Check if ONNX export is enabled in the parameter dictionary @@ -59,9 +62,7 @@ def optimize_and_save_model( num_channel = params["model"]["num_channels"] model_dimension = params["model"]["dimension"] input_shape = params["patch_size"] - onnx_path = path - if not onnx_path.endswith(".onnx"): - onnx_path = onnx_path.replace("pth.tar", "onnx") + onnx_path = output_path.replace(".pth.tar", ".onnx") if model_dimension == 2: dummy_input = torch.randn( diff --git a/GANDLF/version.py b/GANDLF/version.py index e4e2ea407..9d601767a 100644 --- a/GANDLF/version.py +++ b/GANDLF/version.py @@ -2,4 +2,4 @@ # -*- coding: UTF-8 -*- # check GaNDLF wiki for versioning and release guidelines: https://github.com/mlcommons/GaNDLF/wiki -__version__ = "0.0.20" +__version__ = "0.1.0-dev" diff --git a/MANIFEST.in b/MANIFEST.in index b99b29c99..7c33dccc4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,4 +4,4 @@ exclude *.toml include setup.py include .dockerignore include Dockerfile-* -include gandlf_* \ No newline at end of file +include logging_config.yml diff --git a/docs/extending.md b/docs/extending.md index 2677f8c04..69ba06268 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -6,7 +6,7 @@ Before starting to work on the code-level on GaNDLF, please follow the [instruct # continue from previous shell (venv_gandlf) $> # you should be in the "GaNDLF" git repo -(venv_gandlf) $> python ./gandlf_verifyInstall +(venv_gandlf) $> gandlf verify-install ``` @@ -17,7 +17,7 @@ Before starting to work on the code-level on GaNDLF, please follow the [instruct ## Overall Architecture -- Command-line parsing: [gandlf_run](https://github.com/mlcommons/GaNDLF/blob/master/gandlf_run) +- Command-line parsing: [gandlf run](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/entrypoints/run.py) - Parameters from [training configuration](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_all_options.yaml) get passed as a `dict` via the [config manager](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/config_manager.py) - [Training Manager](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/training_manager.py): - Handles k-fold training @@ -77,6 +77,11 @@ To update/change/add a dependency in [setup](https://github.com/mlcommons/GaNDLF - Update [Inference Manager](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/inference_manager.py), if any inference API has changed - [Update Tests](#update-tests) +## Adding new CLI command +Example: `gandlf config-generator` [CLI command](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/entrypoints/config_generator.py) +- Implement function and wrap it with `@click.command()` + `@click.option()` +- Add it to `cli_subommands` [dict](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/entrypoints/subcommands.py) +The command would be available under `gandlf your-subcommand-name` CLI command. ## Update parameters @@ -106,7 +111,7 @@ Once you have the virtual environment set up, tests can be run using the followi (venv_gandlf) $> pytest --device cuda # can be cuda or cpu, defaults to cpu ``` -Any failures will be reported in the file [`${GaNDLF_HOME}/testing/failures.log`](https://github.com/mlcommons/GaNDLF/blob/5030ff83a38947c1583b58a08598308886ee9a0a/testing/conftest.py#L25). +Any failures will be reported in the file [`${GANDLF_HOME}/testing/failures.log`](https://github.com/mlcommons/GaNDLF/blob/5030ff83a38947c1583b58a08598308886ee9a0a/testing/conftest.py#L25). ### Integration tests @@ -127,4 +132,43 @@ bash # continue from previous shell (venv_gandlf) $> coverage run -m pytest --device cuda; coverage report -m ``` +## Logging + +### Use loggers instead of print +We use the native `logging` [library](https://docs.python.org/3/library/logging.html) for logs management. This gets automatically configured when GaNDLF gets launched. So, if you are extending the code, please use loggers instead of prints. + +Here is an example how logger can be used: + +``` +def my_new_cool_function(df: pd.DataFrame): + logger = logging.getLogger(__name__) # you can use any your own logger name or just pass a current file name + logger.debug("Message for debug file only") + logger.info("Hi GaNDLF user, I greet you in the CLI output") + logger.error(f"A detailed message about any error if needed. Exception: {str(e)}, params: {params}, df shape: {df.shape}") + # print("Hi GaNDLF user!") # don't use prints please. +``` + +### What and where is logged + +GaNDLF logs are splitted into multiple parts: +- CLI output: only `info` messages are shown here +- debug file: all messages are shown +- stderr: display `warning`, `error`, or `critical` messages + +By default, the logs are flushed to console. +The logs are **saved** in the path that is defined by the '--log-file' parameter in the CLI commands. +If the path is not provided or an error is raised, the logs will be flushed to console. + + + +Example of log message +``` +#format: "%(asctime)s - %(name)s - %(levelname)s - %(pathname)s:%(lineno)d - %(message)s" +2024-07-03 13:05:51,642 - root - DEBUG - GaNDLF/GANDLF/entrypoints/anonymizer.py:28 - input_dir='.' +``` + +### Create your own logger +You can create and configure your own logger by updating the file `GANDLF/logging_config.yaml`. + + diff --git a/docs/faq.md b/docs/faq.md index d64b420db..b0a89cf9c 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -11,7 +11,7 @@ This means that GaNDLF was not installed correctly. Please ensure you have follo ### Why is GaNDLF not working? -Verify that [the installation](https://mlcommons.github.io/GaNDLF/setup) has been done correctly by running `python ./gandlf_verifyInstall` after activating the correct virtual environment. If you are still having issues, please feel free to [post a support request](https://github.com/mlcommons/GaNDLF/issues/new?assignees=&labels=&template=--questions-help-support.md&title=), and we will do our best to address it ASAP. +Verify that [the installation](https://mlcommons.github.io/GaNDLF/setup) has been done correctly by running `gandlf verify-install` after activating the correct virtual environment. If you are still having issues, please feel free to [post a support request](https://github.com/mlcommons/GaNDLF/issues/new?assignees=&labels=&template=--questions-help-support.md&title=), and we will do our best to address it ASAP. ### Which parts of a GaNDLF configuration are customizable? @@ -32,8 +32,9 @@ If you have `data_preprocessing` enabled, GaNDLF will load all of the resized im ### How can I resume training from a previous checkpoint? GaNDLF allows you to resume training from a previous checkpoint in 2 ways: -- By using the `--resume` CLI parameter in `gandlf_run`, only the model weights and state dictionary will be preserved, but parameters and data are taken from the new options in the CLI. This is helpful when you are updated the training data or **some** compatible options in the parameters. -- If both `--resume` and `--reset` are `False` in `gandlf_run`, the model weights, state dictionary, and all previously saved information (parameters, training/validation/testing data) is used to resume training. +- By using the `--resume` CLI parameter in `gandlf run`, only the model weights and state dictionary will be preserved, but parameters and data are taken from the new options in the CLI. This is helpful when you are updated the training data or **some** compatible options in the parameters. + +- If both `--resume` and `--reset` are skipped in `gandlf run`, the model weights, state dictionary, and all previously saved information (parameters, training/validation/testing data) is used to resume training. ### How can I update GaNDLF? diff --git a/docs/setup.md b/docs/setup.md index 131bd812b..34dc2e330 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -30,13 +30,13 @@ GaNDLF's primary computational foundation is built on PyTorch, and as such it su (venv_gandlf) $> ### subsequent commands go here ### PyTorch installation - https://pytorch.org/get-started/previous-versions/#v210 ## CUDA 12.1 -# (venv_gandlf) $> pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 +# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121 ## CUDA 11.8 -# (venv_gandlf) $> pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 -## ROCm 5.6 -# (venv_gandlf) $> pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/rocm5.6 +# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118 +## ROCm 5.7 +# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/rocm5.7 ## CPU-only -# (venv_gandlf) $> pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu +# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu ``` ### Optional Dependencies diff --git a/docs/usage.md b/docs/usage.md index 2dcb79a58..0064e13b5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -24,13 +24,12 @@ Please follow the [installation instructions](./setup.md#installation) to instal ### Anonymize Data -A major reason why one would want to anonymize data is to ensure that trained models do not inadvertently do not encode protect health information [[1](https://doi.org/10.1145/3436755),[2](https://doi.org/10.1038/s42256-020-0186-1)]. GaNDLF can anonymize single images or a collection of images using the `gandlf_anonymizer` script. It can be used as follows: +A major reason why one would want to anonymize data is to ensure that trained models do not inadvertently do not encode protect health information [[1](https://doi.org/10.1145/3436755),[2](https://doi.org/10.1038/s42256-020-0186-1)]. GaNDLF can anonymize single images or a collection of images using the `gandlf anonymizer` command. It can be used as follows: ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_anonymizer +(venv_gandlf) $> gandlf anonymizer # -h, --help Show help message and exit - # -v, --version Show program's version number and exit. -c ./samples/config_anonymizer.yaml \ # anonymizer configuration - needs to be a valid YAML (check syntax using https://yamlchecker.com/) -i ./input_dir_or_file \ # input directory containing series of images to anonymize or a single image -o ./output_dir_or_file # output directory to save anonymized images or a single output image file (for a DICOM to NIfTi conversion specify a .nii.gz file) @@ -73,7 +72,7 @@ Once these files are present, the patch miner can be run using the following com ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_patchMiner \ +(venv_gandlf) $> gandlf patch-miner \ # -h, --help Show help message and exit -c ./exp_patchMiner/config.yaml \ # patch extraction configuration - needs to be a valid YAML (check syntax using https://yamlchecker.com/) -i ./exp_patchMiner/input.csv \ # data in CSV format @@ -86,7 +85,7 @@ Running preprocessing before training/inference is optional, but recommended. It ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_preprocess \ +(venv_gandlf) $> gandlf preprocess \ # -h, --help Show help message and exit -c ./experiment_0/model.yaml \ # model configuration - needs to be a valid YAML (check syntax using https://yamlchecker.com/) -i ./experiment_0/train.csv \ # data in CSV format @@ -115,9 +114,9 @@ N,/full/path/N/0.nii.gz,/full/path/N/1.nii.gz,...,/full/path/N/X.nii.gz,/full/pa - Multiple segmentation classes should be in a single file with unique label numbers. - Multi-label classification/regression is currently not supported. -### Using the `gandlf_constructCSV` application +### Using the `gandlf construct-csv` command -To make the process of creating the CSV easier, we have provided a utility application called `gandlf_constructCSV`. This script works when the data is arranged in the following format (example shown of the data directory arrangement from the [Brain Tumor Segmentation (BraTS) Challenge](https://www.synapse.org/brats)): +To make the process of creating the CSV easier, we have provided a `gandlf construct-csv` command. This script works when the data is arranged in the following format (example shown of the data directory arrangement from the [Brain Tumor Segmentation (BraTS) Challenge](https://www.synapse.org/brats)): ```bash $DATA_DIRECTORY @@ -150,7 +149,7 @@ The following command shows how the script works: ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_constructCSV \ +(venv_gandlf) $> gandlf construct-csv \ # -h, --help Show help message and exit -i $DATA_DIRECTORY # this is the main data directory -c _t1.nii.gz,_t1ce.nii.gz,_t2.nii.gz,_flair.nii.gz \ # an example image identifier for 4 structural brain MR sequences for BraTS, and can be changed based on your data @@ -164,15 +163,15 @@ The following command shows how the script works: - `SubjectID` or `PatientName` is used to ensure that the randomized split is done per-subject rather than per-image. - For data arrangement different to what is described above, a customized script will need to be written to generate the CSV, or you can enter the data manually into the CSV. -### Using the `gandlf_splitCSV` application +### Using the `gandlf split-csv` command -To split the data CSV into training, validation, and testing CSVs, the `gandlf_splitCSV` script can be used. The following command shows how the script works: +To split the data CSV into training, validation, and testing CSVs, the `gandlf split-csv` script can be used. The following command shows how the script works: ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_splitCSV \ +(venv_gandlf) $> gandlf split-csv \ # -h, --help Show help message and exit - -i ./experiment_0/train_data.csv \ # output CSV from the `gandlf_constructCSV` script + -i ./experiment_0/train_data.csv \ # output CSV from the `gandlf construct-csv` script -c $gandlf_config \ # the GaNDLF config (in YAML) with the `nested_training` key specified to the folds needed -o $output_dir # the output directory to save the split data ``` @@ -194,14 +193,14 @@ GaNDLF requires a YAML-based configuration that controls various aspects of the ### Running multiple experiments (optional) -1. The `gandlf_configGenerator` script can be used to generate a grid of configurations for tuning the hyperparameters of a baseline configuration that works for your dataset and problem. +1. The `gandlf config-generator` command can be used to generate a grid of configurations for tuning the hyperparameters of a baseline configuration that works for your dataset and problem. 2. Use a strategy file (example is shown in [samples/config_generator_strategy.yaml](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_generator_sample_strategy.yaml). 3. Provide the baseline configuration which has enabled you to successfully train a model for `1` epoch for your dataset and problem at hand (regardless of the efficacy). 4. Run the following command: ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_configGenerator \ +(venv_gandlf) $> gandlf config-generator \ # -h, --help Show help message and exit -c ./samples/config_all_options.yaml \ # baseline configuration -s ./samples/config_generator_strategy.yaml \ # strategy file @@ -223,16 +222,16 @@ You can use the following code snippet to run GaNDLF: ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_run \ +(venv_gandlf) $> gandlf run \ # -h, --help Show help message and exit # -v, --version Show program's version number and exit. -c ./experiment_0/model.yaml \ # model configuration - needs to be a valid YAML (check syntax using https://yamlchecker.com/) -i ./experiment_0/train.csv \ # data in CSV format - -m ./experiment_0/model_dir/ \ # model directory (i.e., the `modeldir`) where the output of the training will be stored, created if not present - -t True \ # True == train, False == inference + -m ./experiment_0/model_dir/ \ # model directory (i.e., the `model-dir`) where the output of the training will be stored, created if not present + --train \ # --train/-t or --infer -d cuda # ensure CUDA_VISIBLE_DEVICES env variable is set for GPU device, use 'cpu' for CPU workloads - # -rt , --reset # [optional] completely resets the previous run by deleting `modeldir` - # -rm , --resume # [optional] resume previous training by only keeping model dict in `modeldir` + # -rt , --reset # [optional] completely resets the previous run by deleting `model-dir` + # -rm , --resume # [optional] resume previous training by only keeping model dict in `model-dir` ``` ### Special notes for Inference for Histology images @@ -247,12 +246,12 @@ GaNDLF provides a script to generate metrics after an inference process is done. ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_generateMetrics \ +(venv_gandlf) $> gandlf generate-metrics \ # -h, --help Show help message and exit # -v, --version Show program's version number and exit. -c , --config The configuration file (contains all the information related to the training/inference session) - -i , --inputdata CSV file that is used to generate the metrics; should contain 3 columns: 'SubjectID,Target,Prediction' - -o , --outputfile Location to save the output dictionary. If not provided, will print to stdout. + -i , --input-data CSV file that is used to generate the metrics; should contain 3 columns: 'SubjectID,Target,Prediction' + -o , --output-file Location to save the output dictionary. If not provided, will print to stdout. ``` Once you have your CSV in the specific format, you can pass it on to generate the metrics. Here is an example for segmentation: @@ -264,7 +263,7 @@ SubjectID,Target,Prediction ... ``` -Similarly for classification or regression (`A`, `B`, `C`, `D` are integers for classification and floats for regression): +Similarly, for classification or regression (`A`, `B`, `C`, `D` are integers for classification and floats for regression): ```csv SubjectID,Target,Prediction @@ -287,7 +286,7 @@ SubjectID,Target,Prediction,Mask ### Multi-GPU training -GaNDLF enables relatively straightforward multi-GPU training. Simply set the `CUDA_VISIBLE_DEVICES` environment variable to the list of GPUs you want to use, and pass `cuda` as the device to the `gandlf_run` script. For example, if you want to use GPUs 0, 1, and 2, you would set `CUDA_VISIBLE_DEVICES=0,1,2` [[ref](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/)] and pass `-d cuda` to the `gandlf_run` script. +GaNDLF enables relatively straightforward multi-GPU training. Simply set the `CUDA_VISIBLE_DEVICES` environment variable to the list of GPUs you want to use, and pass `cuda` as the device to the `gandlf run` command. For example, if you want to use GPUs 0, 1, and 2, you would set `CUDA_VISIBLE_DEVICES=0,1,2` [[ref](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/)] and pass `-d cuda` to the `gandlf run` command. ### Distributed training @@ -316,18 +315,18 @@ ${architecture_name}_initial.{onnx/xml/bin} # [optional] if ${architecture_name} ### Inference - The output of inference will be predictions based on the model that was trained. -- The predictions will be saved in the same directory as the model if `outputdir` is not passed to `gandlf_run`. +- The predictions will be saved in the same directory as the model if `output-dir` is not passed to `gandlf run`. - For segmentation, a directory will be created per subject ID in the input CSV. -- For classification/regression, the predictions will be generated in the `outputdir` or `modeldir` as a CSV file. +- For classification/regression, the predictions will be generated in the `output-dir` or `model-dir` as a CSV file. ## Plot the final results -After the testing/validation training is finished, GaNDLF enables the collection of all the statistics from the final models for testing and validation datasets and plot them. The [gandlf_collectStats](https://github.com/mlcommons/GaNDLF/blob/master/gandlf_collectStats) can be used for plotting: +After the testing/validation training is finished, GaNDLF enables the collection of all the statistics from the final models for testing and validation datasets and plot them. The [gandlf collect-stats](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/entrypoints/collect_stats.py) command can be used for plotting: ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_collectStats \ +(venv_gandlf) $> gandlf collect-stats \ -m /path/to/trained/models \ # directory which contains testing and validation models -o ./experiment_0/output_dir_stats/ # output directory to save stats and plot ``` @@ -360,11 +359,11 @@ All generated attention maps can be found in the experiment's output directory. ## Post-Training Model Optimization -If you have a model previously trained using GaNDLF that you wish to run graph optimizations on, you can use the `gandlf_optimize` script to do so. The following command shows how it works: +If you have a model previously trained using GaNDLF that you wish to run graph optimizations on, you can use the `gandlf optimize-model` command to do so. The following command shows how it works: ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_optimizeModel \ +(venv_gandlf) $> gandlf optimize-model \ -m /path/to/trained/${architecture_name}_best.pth.tar # directory which contains testing and validation models ``` @@ -379,13 +378,13 @@ GaNDLF provides the ability to deploy models into easy-to-share, easy-to-use for The resulting image contains your specific version of GaNDLF (including any custom changes you have made) and your trained model and configuration. This ensures that upstream changes to GaNDLF will not break compatibility with your model. -To deploy a model, simply run the `gandlf_deploy` command after training a model. You will need the [Docker engine](https://www.docker.com/get-started/) installed to build Docker images. This will create the image and, for MLCubes, generate an MLCube directory complete with an `mlcube.yaml` specifications file, along with the workspace directory copied from a pre-existing template. +To deploy a model, simply run the `gandlf deploy` command after training a model. You will need the [Docker engine](https://www.docker.com/get-started/) installed to build Docker images. This will create the image and, for MLCubes, generate an MLCube directory complete with an `mlcube.yaml` specifications file, along with the workspace directory copied from a pre-existing template. ```bash # continue from previous shell -(venv_gandlf) $> python gandlf_deploy \ +(venv_gandlf) $> gandlf deploy \ # -h, --help Show help message and exit - -c ./experiment_0/model.yaml \ # Configuration to bundle with the model (you can recover it with gandlf_recoverConfig first if needed) + -c ./experiment_0/model.yaml \ # Configuration to bundle with the model (you can recover it with `gandlf recover-config` first if needed) -m ./experiment_0/model_dir/ \ # model directory (i.e., modeldir) --target docker \ # the target platform (--help will show all available targets) --mlcube-root ./my_new_mlcube_dir \ # Directory containing mlcube.yaml (used to configure your image base) @@ -398,7 +397,7 @@ To deploy a model, simply run the `gandlf_deploy` command after training a model You can also deploy GaNDLF as a metrics generator (see the [Generate Metrics](#generate-metrics) section) as follows: ```bash -(venv_gandlf) $> python gandlf_deploy \ +(venv_gandlf) $> gandlf deploy \ ## -h, --help show help message and exit --target docker \ # the target platform (--help will show all available targets) --mlcube-root ./my_new_mlcube_dir \ # Directory containing mlcube.yaml (used to configure your image base) @@ -454,18 +453,18 @@ For example, you might run: (main) $> docker run -it --rm --name gandlf --volume /home/researcher/gandlf_input:/input:ro --volume /home/researcher/gandlf_output:/output cbica/gandlf:latest-cpu [command and args go here] ``` -Remember that the process running in the container only considers the filesystem inside the container, which is structured differently from that of your host machine. Therefore, you will need to give paths relative to the mount point *destination*. Additionally, any paths used internally by GaNDLF will refer to locations inside the container. This means that data CSVs produced by the `gandlf_constructCSV` script will need to be made from the container and with input in the same locations. Expanding on our last example: +Remember that the process running in the container only considers the filesystem inside the container, which is structured differently from that of your host machine. Therefore, you will need to give paths relative to the mount point *destination*. Additionally, any paths used internally by GaNDLF will refer to locations inside the container. This means that data CSVs produced by the `gandlf construct-csv` command will need to be made from the container and with input in the same locations. Expanding on our last example: ```bash (main) $> docker run -it --rm --name dataprep \ --volume /home/researcher/gandlf_input:/input:ro \ # input data is mounted as read-only --volume /home/researcher/gandlf_output:/output \ # output data is mounted as read-write cbica/gandlf:latest-cpu \ # change to appropriate docker image tag - gandlf_constructCSV \ # standard construct CSV API starts - --inputDir /input/data \ - --outputFile /output/data.csv \ - --channelsID _t1.nii.gz \ - --labelID _seg.nii.gz + construct-csv \ # standard construct CSV API starts + --input-dir /input/data \ + --output-file /output/data.csv \ + --channels-id _t1.nii.gz \ + --label-id _seg.nii.gz ``` The previous command will generate a data CSV file that you can safely edit outside the container (such as by adding a `ValueToPredict` column). Then, you can refer to the same file when running again: @@ -475,18 +474,18 @@ The previous command will generate a data CSV file that you can safely edit outs --volume /home/researcher/gandlf_input:/input:ro \ # input data is mounted as read-only --volume /home/researcher/gandlf_output:/output \ # output data is mounted as read-write cbica/gandlf:latest-cpu \ # change to appropriate docker image tag - gandlf_run --train True \ # standard training API starts + gandlf run --train \ # standard training API starts --config /input/config.yml \ --inputdata /output/data.csv \ --modeldir /output/model ``` #### Special Case for Training -Considering that you want to train on an existing model that is inside the GaNDLF container (such as in an MLCube container created by `gandlf_deploy`), the output will be to a location embedded inside the container. Since you cannot mount something into that spot without overwriting the model, you can instead use the built-in `docker cp` command to extract the model afterward. For example, you can fine-tune a model on your own data using the following commands as a starting point: +Considering that you want to train on an existing model that is inside the GaNDLF container (such as in an MLCube container created by `gandlf deploy`), the output will be to a location embedded inside the container. Since you cannot mount something into that spot without overwriting the model, you can instead use the built-in `docker cp` command to extract the model afterward. For example, you can fine-tune a model on your own data using the following commands as a starting point: ```bash # Run training on your new data -(main) $> docker run --name gandlf_training mlcommons/gandlf-pretrained:0.0.1 -v /my/input/data:/input gandlf_run -m /embedded_model/ [...] # Do not include "--rm" option! +(main) $> docker run --name gandlf_training mlcommons/gandlf-pretrained:0.0.1 -v /my/input/data:/input gandlf run -m /embedded_model/ [...] # Do not include "--rm" option! # Copy the finetuned model out of the container, to a location on the host (main) $> docker cp gandlf_training:/embedded_model /home/researcher/extracted_model # Now you can remove the container to clean up @@ -501,7 +500,7 @@ If using CUDA, GaNDLF also expects the environment variable `CUDA_VISIBLE_DEVICE For example: ```bash -(main) $> docker run --gpus all -e CUDA_VISIBLE_DEVICES -it --rm --name gandlf cbica/gandlf:latest-cuda113 gandlf_run --device cuda [...] +(main) $> docker run --gpus all -e CUDA_VISIBLE_DEVICES -it --rm --name gandlf cbica/gandlf:latest-cuda113 gandlf run --device cuda [...] ``` This can be replicated for ROCm for AMD , by following the [instructions to set up the ROCm Container Toolkit](https://rocmdocs.amd.com/en/latest/ROCm_Virtualization_Containers/ROCm-Virtualization-&-Containers.html?highlight=docker). diff --git a/gandlf_anonymizer b/gandlf_anonymizer deleted file mode 100644 index 2d6141c89..000000000 --- a/gandlf_anonymizer +++ /dev/null @@ -1,68 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import os, argparse, sys, yaml -from GANDLF.anonymize import run_anonymizer -from GANDLF.cli import copyrightMessage - - -def main(): - parser = argparse.ArgumentParser( - prog="GANDLF_Anonymize", - formatter_class=argparse.RawTextHelpFormatter, - description="Anonymize images/scans in the data directory.\n\n" - + copyrightMessage, - ) - parser.add_argument( - "-i", - "--inputDir", - metavar="", - type=str, - help="Input directory or file which contains images to be anonymized.", - ) - parser.add_argument( - "-c", - "--config", - metavar="", - default="", - type=str, - help="config (in YAML) for running anonymization, optionally, specify modality using '-m' for defaults.", - ) - parser.add_argument( - "-m", - "--modality", - metavar="", - default="rad", - type=str, - help="The modality type, can be 'rad' or 'histo'.", - ) - parser.add_argument( - "-o", - "--outputFile", - metavar="", - type=str, - help="Output directory or file which will contain the image(s) after anonymization.", - ) - - args = parser.parse_args() - - # check for required parameters - this is needed here to keep the cli clean - for param_none_check in [args.inputDir, args.outputFile]: - if param_none_check is None: - sys.exit("ERROR: Missing required parameter:", param_none_check) - - inputDir = os.path.normpath(args.inputDir) - outputFile = os.path.normpath(args.outputFile) - if os.path.isfile(args.config): - config = yaml.safe_load(open(args.config, "r")) - else: - config = None - - run_anonymizer(inputDir, outputFile, config, args.modality) - - print("Finished successfully.") - - -# main function -if __name__ == "__main__": - main() diff --git a/gandlf_configGenerator b/gandlf_configGenerator deleted file mode 100644 index 26ddca70b..000000000 --- a/gandlf_configGenerator +++ /dev/null @@ -1,42 +0,0 @@ -import argparse -from GANDLF.cli import config_generator, copyrightMessage - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_ConfigGenerator", - formatter_class=argparse.RawTextHelpFormatter, - description="Generate multiple GaNDLF configurations based on a single baseline GaNDLF for experimentation.\n\n" - + copyrightMessage, - ) - - parser.add_argument( - "-c", - "--config", - metavar="", - type=str, - help="Path to base config.", - required=True, - ) - parser.add_argument( - "-s", - "--strategy", - metavar="", - type=str, - help="Config creation strategy in a yaml format.", - required=True, - ) - parser.add_argument( - "-o", - "--output", - metavar="", - type=str, - help="Path to output directory.", - required=True, - ) - - args = parser.parse_args() - - config_generator(args.config, args.strategy, args.output) - - print("Finished.") diff --git a/gandlf_constructCSV b/gandlf_constructCSV deleted file mode 100644 index a61f322ea..000000000 --- a/gandlf_constructCSV +++ /dev/null @@ -1,94 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import os, argparse, sys, ast -from datetime import date -from GANDLF.utils import writeTrainingCSV - -from GANDLF.cli import copyrightMessage - -import yaml - - -def main(): - parser = argparse.ArgumentParser( - prog="GANDLF_ConstructCSV", - formatter_class=argparse.RawTextHelpFormatter, - description="Generate training/inference CSV from data directory.\n\n" - + copyrightMessage, - ) - parser.add_argument( - "-i", - "--inputDir", - metavar="", - type=str, - help="Input data directory which contains images in specified format", - ) - parser.add_argument( - "-c", - "--channelsID", - metavar="", - type=str, - help="Channels/modalities identifier string to check for in all files in 'input_dir'; for example: --channelsID _t1.nii.gz,_t2.nii.gz", - ) - parser.add_argument( - "-l", - "--labelID", - default=None, - type=str, - help="Label/mask identifier string to check for in all files in 'input_dir'; for example: --labelID _seg.nii.gz", - ) - parser.add_argument( - "-o", - "--outputFile", - metavar="", - type=str, - help="Output CSV file", - ) - parser.add_argument( - "-r", - "--relativizePaths", - metavar="", - type=ast.literal_eval, - default=False, - help="If True, paths in the output data CSV will always be relative to the location of the output data CSV itself.", - ) - - args = parser.parse_args() - - # check for required parameters - this is needed here to keep the cli clean - for param_none_check in [ - args.inputDir, - args.channelsID, - args.outputFile, - ]: - if param_none_check is None: - sys.exit("ERROR: Missing required parameter:", param_none_check) - - inputDir = os.path.normpath(args.inputDir) - outputFile = os.path.normpath(args.outputFile) - channelsID = args.channelsID - labelID = args.labelID - relativizePathsToOutput = args.relativizePaths - - # Do some special handling for if users pass a yml file for channel/label IDs - # This is used for MLCube functionality because MLCube does not support plain string inputs. - if channelsID.endswith(".yml") or channelsID.endswith(".yaml"): - if os.path.isfile(channelsID): - with open(channelsID, "r") as f: - content = yaml.safe_load(f) - channelsID = content["channels"] - if isinstance(channelsID, list): - channelsID = ",".join(channelsID) - - if "label" in content: - labelID = content["label"] - if isinstance(labelID, list): - channelsID = ",".join(channelsID) - - writeTrainingCSV(inputDir, channelsID, labelID, outputFile, relativizePathsToOutput) - - -# main function -if __name__ == "__main__": - main() diff --git a/gandlf_deploy b/gandlf_deploy deleted file mode 100644 index cebb6c001..000000000 --- a/gandlf_deploy +++ /dev/null @@ -1,117 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import ast -import os -from GANDLF.cli import ( - deploy_targets, - mlcube_types, - run_deployment, - recover_config, - copyrightMessage, -) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_Deploy", - formatter_class=argparse.RawTextHelpFormatter, - description="Generate frozen/deployable versions of trained GaNDLF models.\n\n" - + copyrightMessage, - ) - - parser.add_argument( - "-m", - "--model", - metavar="", - type=str, - help="Path to the model directory you wish to deploy. Required for model MLCubes, ignored for metrics MLCubes.", - default=None, - ) - parser.add_argument( - "-c", - "--config", - metavar="", - type=str, - default=None, - help="Optional path to an alternative config file to be embedded with the model. If blank/default, we use the previous config from the model instead. Only relevant for model MLCubes. Ignored for metrics MLCubes", - ) - parser.add_argument( - "-t", - "--target", - metavar="", - type=str, - help="The target platform. Valid inputs are: " - + ", ".join(deploy_targets) - + " .", - required=True, - ) - parser.add_argument( - "--mlcube-type", - metavar="", - type=str, - help="The mlcube type. Valid inputs are: " + ", ".join(mlcube_types) + " .", - required=True, - ) - parser.add_argument( - "-r", - "--mlcube-root", - metavar="", - type=str, - required=True, - help="Path to an alternative MLCUBE_ROOT directory to use as a template (or a path to a specific mlcube YAML configuration file, in which case we will use the parent directory). The source repository contains an example (https://github.com/mlcommons/GaNDLF/tree/master/mlcube).", - ) - parser.add_argument( - "-o", - "--outputdir", - metavar="", - type=str, - help="Output directory path. For MLCube builds, generates an MLCube directory to be distributed with your MLCube.", - required=True, - ) - parser.add_argument( - "-g", - "--requires-gpu", - metavar="", - type=ast.literal_eval, - help="True if the model requires a GPU by default, False otherwise. Only relevant for model MLCubes. Ignored for metrics MLCubes", - default=True, - ) - parser.add_argument( - "-e", - "--entrypoint", - metavar="", - type=str, - help="An optional custom python entrypoint script to use instead of the default specified in mlcube.yaml. (Only for inference and metrics)", - default=None, - ) - - args = parser.parse_args() - - if not os.path.exists(args.outputdir): - os.makedirs(args.outputdir, exist_ok=True) - - config_to_use = args.config - if not args.config and args.mlcube_type == "model": - result = recover_config(args.model, args.outputdir + "/original_config.yml") - assert ( - result - ), "Error: No config was specified but automatic config extraction failed." - config_to_use = args.outputdir + "/original_config.yml" - - if not args.model and args.mlcube_type == "model": - raise AssertionError( - "Error: a path to a model directory should be provided when deploying a model" - ) - result = run_deployment( - args.mlcube_root, - args.outputdir, - args.target, - args.mlcube_type, - args.entrypoint, - config_to_use, - args.model, - args.requires_gpu, - ) - assert result, "Deployment to the target platform failed." diff --git a/gandlf_generateMetrics b/gandlf_generateMetrics deleted file mode 100644 index a25d663d6..000000000 --- a/gandlf_generateMetrics +++ /dev/null @@ -1,71 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import argparse -import ast -import sys - -from GANDLF import version -from GANDLF.cli import generate_metrics, copyrightMessage - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_Metrics", - formatter_class=argparse.RawTextHelpFormatter, - description="Metrics calculator.\n\n" + copyrightMessage, - ) - parser.add_argument( - "-c", - "--config", - "--parameters_file", - metavar="", - type=str, - required=True, - help="The configuration file (contains all the information related to the training/inference session)", - ) - parser.add_argument( - "-i", - "--inputdata", - "--data_path", - metavar="", - type=str, - required=True, - help="The CSV file of input data that is used to generate the metrics; should contain 3 columns: 'SubjectID,Target,Prediction'", - ) - parser.add_argument( - "-o", - "--outputfile", - "--output_path", - metavar="", - type=str, - default=None, - help="Location to save the output dictionary. If not provided, will print to stdout.", - ) - parser.add_argument( - "-v", - "--version", - action="version", - version="%(prog)s v{}".format(version) + "\n\n" + copyrightMessage, - help="Show program's version number and exit.", - ) - - # This is a dummy argument that exists to trigger MLCube mounting requirements. - # Do not remove. - parser.add_argument("-rawinput", "--rawinput", help=argparse.SUPPRESS) - - args = parser.parse_args() - assert args.config is not None, "Missing required parameter: config" - assert args.inputdata is not None, "Missing required parameter: inputdata" - - try: - generate_metrics.generate_metrics_dict( - args.inputdata, - args.config, - args.outputfile, - ) - except Exception as e: - sys.exit("ERROR: " + str(e)) - - print("Finished.") diff --git a/gandlf_optimizeModel b/gandlf_optimizeModel deleted file mode 100644 index d1cc578c0..000000000 --- a/gandlf_optimizeModel +++ /dev/null @@ -1,39 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -from GANDLF.cli import copyrightMessage, post_training_model_optimization - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_OptimizeModel", - formatter_class=argparse.RawTextHelpFormatter, - description="Generate optimized versions of trained GaNDLF models.\n\n" - + copyrightMessage, - ) - - parser.add_argument( - "-m", - "--model", - metavar="", - type=str, - help="Path to the model file (ending in '.pth.tar') you wish to optimize.", - required=True, - ) - parser.add_argument( - "-c", - "--config", - metavar="", - type=str, - default=None, - required=False, - help="The configuration file (contains all the information related to the training/inference session).", - ) - - args = parser.parse_args() - - if post_training_model_optimization(args.model, args.config): - print("Post-training model optimization successful.") - else: - print("Post-training model optimization failed.") diff --git a/gandlf_patchMiner b/gandlf_patchMiner deleted file mode 100644 index 1cb63ec2d..000000000 --- a/gandlf_patchMiner +++ /dev/null @@ -1,46 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -from GANDLF.cli.patch_extraction import patch_extraction - -from GANDLF.cli import copyrightMessage - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_PatchMiner", - formatter_class=argparse.RawTextHelpFormatter, - description="Construct patches from whole slide image(s).\n\n" - + copyrightMessage, - ) - - parser.add_argument( - "-i", - "--input_CSV", - dest="input_path", - help="input path for the tissue", - required=True, - ) - parser.add_argument( - "-o", - "--output_path", - dest="output_path", - default=None, - required=True, - help="output path for the patches", - ) - parser.add_argument( - "-c", - "--config", - type=str, - dest="config", - help="config (in YAML) for running the patch miner. Needs 'scale' and 'patch_size' to be defined, otherwise defaults to 16 and (256, 256), respectively.", - required=False, - ) - - args = parser.parse_args() - - patch_extraction(args.input_path, args.output_path, args.config) - - print("Finished.") diff --git a/gandlf_preprocess b/gandlf_preprocess deleted file mode 100644 index 1522f8607..000000000 --- a/gandlf_preprocess +++ /dev/null @@ -1,79 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse - -from GANDLF.cli import preprocess_and_save, copyrightMessage - -# main function -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_Preprocess", - formatter_class=argparse.RawTextHelpFormatter, - description="Generate training/inference data which are preprocessed to reduce resource footprint during computation.\n\n" - + copyrightMessage, - ) - parser.add_argument( - "-c", - "--config", - metavar="", - type=str, - help="The configuration file (contains all the information related to the training/inference session), this is read from 'output' during inference", - required=True, - ) - parser.add_argument( - "-i", - "--inputdata", - metavar="", - type=str, - help="Data csv file that is used for training/inference", - required=True, - ) - parser.add_argument( - "-o", - "--output", - metavar="", - type=str, - help="Output directory to save intermediate files and model weights", - required=True, - ) - parser.add_argument( - "-l", - "--labelPad", - metavar="", - type=str, - default="constant", - help="This specifies the padding strategy for the label when 'patch_sampler' is 'label'. Defaults to 'constant' [full list: https://numpy.org/doc/stable/reference/generated/numpy.pad.html]", - required=False, - ) - parser.add_argument( - "-a", - "--applyaugs", - metavar="", - type=bool, - default=False, - help="This specifies the whether to apply data augmentation during output creation. Defaults to False", - required=False, - ) - parser.add_argument( - "-a", - "--cropzero", - metavar="", - type=bool, - default=False, - help="This specifies the whether to apply zero cropping during output creation. Defaults to False", - required=False, - ) - - args = parser.parse_args() - - preprocess_and_save( - args.inputdata, - args.config, - args.output, - args.labelPad, - args.applyaugs, - args.cropzero, - ) - - print("Finished.") diff --git a/gandlf_recoverConfig b/gandlf_recoverConfig deleted file mode 100644 index 37653fe04..000000000 --- a/gandlf_recoverConfig +++ /dev/null @@ -1,50 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -from GANDLF.cli import copyrightMessage, recover_config -import pickle -import os, sys -import yaml - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_RecoverConfig", - formatter_class=argparse.RawTextHelpFormatter, - description="Recovers a config file from a GaNDLF model. If used from within a deployed GaNDLF MLCube, attempts to extract the config from the embedded model.\n\n" - + copyrightMessage, - ) - - parser.add_argument( - "-m", - "--modeldir", - metavar="", - default="", - type=str, - help="Path to the model directory.", - ) - parser.add_argument( - "-c", - "--mlcube", - metavar="", - type=str, - help="Pass this option to attempt to extract the config from the embedded model in a GaNDLF MLCube (if any). Only useful in that context.", - ) - parser.add_argument( - "-o", - "--outputFile", - metavar="", - type=str, - help="Path to an output file where the config will be written.", - ) - - args = parser.parse_args() - - if args.mlcube: - search_dir = "/embedded_model/" - else: - search_dir = args.modeldir - - result = recover_config(search_dir, args.outputFile) - assert result, "Config file recovery failed." diff --git a/gandlf_run b/gandlf_run deleted file mode 100644 index 0186b69e0..000000000 --- a/gandlf_run +++ /dev/null @@ -1,139 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import argparse -import ast -import sys -import traceback - -from GANDLF import version -from GANDLF.cli import main_run, copyrightMessage - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF", - formatter_class=argparse.RawTextHelpFormatter, - description="Semantic segmentation, regression, and classification for medical images using Deep Learning.\n\n" - + copyrightMessage, - ) - parser.add_argument( - "-c", - "--config", - "--parameters_file", - metavar="", - type=str, - required=True, - help="The configuration file (contains all the information related to the training/inference session)", - ) - parser.add_argument( - "-i", - "--inputdata", - "--data_path", - metavar="", - type=str, - required=True, - help="Data CSV file that is used for training/inference; can also take comma-separated training-validation pre-split CSVs", - ) - parser.add_argument( - "-t", - "--train", - metavar="", - type=ast.literal_eval, - required=True, - help="True: training and False: inference; for inference, there needs to be a compatible model saved in '-modeldir'", - ) - parser.add_argument( - "-m", - "--modeldir", - metavar="", - type=str, - help="Training: Output directory to save intermediate files and model weights; inference: location of previous training session output", - ) - parser.add_argument( - "-d", - "--device", - default="cuda", - metavar="", - type=str, - required=True, - help="Device to perform requested session on 'cpu' or 'cuda'; for cuda, ensure CUDA_VISIBLE_DEVICES env var is set", - ) - parser.add_argument( - "-rt", - "--reset", - metavar="", - default=False, - type=ast.literal_eval, - help="Completely resets the previous run by deleting 'modeldir'", - ) - parser.add_argument( - "-rm", - "--resume", - metavar="", - default=False, - type=ast.literal_eval, - help="Resume previous training by only keeping model dict in 'modeldir'", - ) - parser.add_argument( - "-o", - "--outputdir", - "--output_path", - metavar="", - type=str, - help="Location to save the output of the inference session. Not used for training.", - ) - parser.add_argument( - "-v", - "--version", - action="version", - version="%(prog)s v{}".format(version) + "\n\n" + copyrightMessage, - help="Show program's version number and exit.", - ) - - # This is a dummy argument that exists to trigger MLCube mounting requirements. - # Do not remove. - parser.add_argument("-rawinput", "--rawinput", help=argparse.SUPPRESS) - - args = parser.parse_args() - if args.modeldir is None and args.outputdir: - args.modeldir = args.outputdir - - assert args.modeldir is not None, "Missing required parameter: modeldir" - - if os.path.isdir(args.inputdata): - # Is this a fine assumption to make? - # Medperf models receive the data generated by the data preparator mlcube - # We can therefore ensure the output of that mlcube contains a data.csv file - filename = "data.csv" - args.inputdata = os.path.join(args.inputdata, filename) - - if not args.train: - # if inference mode, then no need to check for reset/resume - args.reset, args.resume = False, False - - if args.reset and args.resume: - print( - "WARNING: 'reset' and 'resume' are mutually exclusive; 'resume' will be used." - ) - args.reset = False - - # config file should always be present - assert os.path.isfile(args.config), "Configuration file not found!" - - try: - main_run( - args.inputdata, - args.config, - args.modeldir, - args.train, - args.device, - args.resume, - args.reset, - args.outputdir, - ) - except Exception: - sys.exit("ERROR: " + traceback.format_exc()) - - print("Finished.") diff --git a/gandlf_splitCSV b/gandlf_splitCSV deleted file mode 100644 index 339fec2ac..000000000 --- a/gandlf_splitCSV +++ /dev/null @@ -1,66 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import os, argparse, sys, yaml -from GANDLF.cli import copyrightMessage, split_data_and_save_csvs - - -def main(): - parser = argparse.ArgumentParser( - prog="GANDLF_SplitCSV", - formatter_class=argparse.RawTextHelpFormatter, - description="Split the data into training, validation, and testing sets and save them as csvs in the output directory.\n\n" - + copyrightMessage, - ) - parser.add_argument( - "-i", - "--inputCSV", - metavar="", - default=None, - type=str, - required=True, - help="Input CSV file which contains the data to be split.", - ) - parser.add_argument( - "-c", - "--config", - metavar="", - default=None, - required=True, - type=str, - help="The GaNDLF config (in YAML) with the `nested_training` key specified to the folds needed.", - ) - parser.add_argument( - "-o", - "--outputDir", - metavar="", - default=None, - type=str, - required=True, - help="Output directory to save the split data.", - ) - - args = parser.parse_args() - - # check for required parameters - this is needed here to keep the cli clean - for param_none_check in [args.inputCSV, args.outputDir, args.config]: - if param_none_check is None: - sys.exit("ERROR: Missing required parameter:", param_none_check) - - inputCSV = os.path.normpath(args.inputCSV) - outputDir = os.path.normpath(args.outputDir) - # initialize default - config = {"nested_training": {"testing": 5, "validation": 5}} - if os.path.isfile(args.config): - config = yaml.safe_load(open(args.config, "r")) - - print("Config used for split:", config) - - split_data_and_save_csvs(inputCSV, outputDir, config) - - print("Finished successfully.") - - -# main function -if __name__ == "__main__": - main() diff --git a/gandlf_verifyInstall b/gandlf_verifyInstall deleted file mode 100644 index 6955bc152..000000000 --- a/gandlf_verifyInstall +++ /dev/null @@ -1,32 +0,0 @@ -#!usr/bin/env python -# -*- coding: utf-8 -*- - -import os, argparse, sys - - -# main function -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_VerifyInstall", - formatter_class=argparse.RawTextHelpFormatter, - description="Verify GaNDLF installation.", - ) - - try: - import GANDLF as gf - - print("GaNDLF installed version:", gf.__version__) - except: - raise Exception( - "GaNDLF not properly installed, please see https://mlcommons.github.io/GaNDLF/setup" - ) - - # we always want to do submodule update to ensure any hash updates are ingested correctly - try: - os.system(f"{sys.executable} -m pip install -e .") - except: - print("Git was not found, please try again.") - - args = parser.parse_args() - - print("GaNDLF is ready. See https://mlcommons.github.io/GaNDLF/usage") diff --git a/mlcube/metrics_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py b/mlcube/metrics_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py index 88f5cf2ad..275cfc5da 100644 --- a/mlcube/metrics_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py +++ b/mlcube/metrics_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py @@ -1,6 +1,7 @@ import os import argparse import sys +import logging import pandas as pd @@ -40,29 +41,29 @@ def create_csv(predictions, labels): input_data_df.to_csv("./data.csv", index=False) -def run_gandlf(output_path, parameters_file): +def run_gandlf(output_file, config): """ A function that calls GaNDLF's generate metrics command with the previously created csv. Args: - output_path (str): The path to the output file/folder - parameters_file (str): The path to the parameters file + output_file (str): The path to the output file/folder + config (str): The path to the parameters file """ exit_status = os.system( - f"python3.9 gandlf_generateMetrics -c {parameters_file} -i ./data.csv -o {output_path}" + f"gandlf generate-metrics -c {config} -i ./data.csv -o {output_file}" ) exit_code = os.WEXITSTATUS(exit_status) - sys.exit(exit_code) + logging.info(exit_code) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--parameters_file", metavar="", type=str, required=True) + parser.add_argument("--config", metavar="", type=str, required=True) parser.add_argument("--predictions", metavar="", type=str, required=True) - parser.add_argument("--output_path", metavar="", type=str, default=None) + parser.add_argument("--output-file", metavar="", type=str, default=None) parser.add_argument("--labels", metavar="", type=str, required=True) args = parser.parse_args() create_csv(args.predictions, args.labels) - run_gandlf(args.output_path, args.parameters_file) + run_gandlf(args.output_file, args.config) diff --git a/mlcube/metrics_mlcube/example_custom_entrypoint/template.py b/mlcube/metrics_mlcube/example_custom_entrypoint/template.py index c3f7d059b..5c9a31391 100644 --- a/mlcube/metrics_mlcube/example_custom_entrypoint/template.py +++ b/mlcube/metrics_mlcube/example_custom_entrypoint/template.py @@ -7,6 +7,7 @@ import os import argparse import sys +import logging def create_csv(predictions, labels): @@ -15,29 +16,29 @@ def create_csv(predictions, labels): raise NotImplementedError -def run_gandlf(output_path, parameters_file): +def run_gandlf(output_file, config): """ A function that calls GaNDLF's generate metrics command with the previously created csv. Args: - output_path (str): The path to the output file/folder - parameters_file (str): The path to the parameters file + output_file (str): The path to the output file/folder + config (str): The path to the parameters file """ exit_status = os.system( - f"python3.9 gandlf_generateMetrics -c {parameters_file} -i ./data.csv -o {output_path}" + f"gandlf generate-metrics -c {config} -i ./data.csv -o {output_file}" ) exit_code = os.WEXITSTATUS(exit_status) - sys.exit(exit_code) + logging.info(exit_code) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--parameters_file", metavar="", type=str, required=True) + parser.add_argument("--config", metavar="", type=str, required=True) parser.add_argument("--predictions", metavar="", type=str, required=True) - parser.add_argument("--output_path", metavar="", type=str, default=None) + parser.add_argument("--output-file", metavar="", type=str, default=None) parser.add_argument("--labels", metavar="", type=str, required=True) args = parser.parse_args() create_csv(args.predictions, args.labels) - run_gandlf(args.output_path, args.parameters_file) + run_gandlf(args.output_file, args.config) diff --git a/mlcube/metrics_mlcube/mlcube.yaml b/mlcube/metrics_mlcube/mlcube.yaml index 4fd8005a8..f8d686e0b 100644 --- a/mlcube/metrics_mlcube/mlcube.yaml +++ b/mlcube/metrics_mlcube/mlcube.yaml @@ -1,6 +1,6 @@ ## This YAML file contains MLCube configuration. -## The gandlf_deployMetrics tool looks for this file to generate your metrics calculation MLCube image. -## If you are an author, this file (or the derivative generated by gandlf_deployMetrics) +## The `gandlf deploy` tool looks for this file to generate your metrics calculation MLCube image. +## If you are an author, this file (or the derivative generated by `gandlf deploy`) ## can be distributed along with your container to enable use as an MLCube. ## See the MLCube specifications (ex: https://mlcommons.github.io/mlcube/runners/) for additional options. @@ -24,7 +24,7 @@ docker: image: mlcommons/gandlf:0.0.1 ## Generally, these build options will only be needed by GaNDLF maintainers. - # Docker build context relative to $MLCUBE_ROOT. (gandlf_deploy can handle this automatically.) + # Docker build context relative to $MLCUBE_ROOT. (`gandlf deploy` can handle this automatically.) build_context: "../" # Docker file name within docker build context. Any "Dockerfile-*" in the GaNDLF source repo is valid. build_file: "Dockerfile-CPU" @@ -42,11 +42,11 @@ singularity: tasks: evaluate: # Runs metrics calculation on predictions - entrypoint: "python3.9 gandlf_generateMetrics" + entrypoint: "gandlf generate-metrics" parameters: inputs: { - data_path: data/, + input-data: data/, # GaNDLF config file - parameters_file: {type: file, default: config.yaml} + config: {type: file, default: config.yaml} } - outputs: { output_path: { type: "file", default: "results.yaml" } } + outputs: { output-file: { type: "file", default: "results.yaml" } } diff --git a/mlcube/metrics_mlcube/mlcube_medperf.yaml b/mlcube/metrics_mlcube/mlcube_medperf.yaml index 80a73d4b9..c077360b0 100644 --- a/mlcube/metrics_mlcube/mlcube_medperf.yaml +++ b/mlcube/metrics_mlcube/mlcube_medperf.yaml @@ -1,6 +1,6 @@ ## This YAML file contains MLCube configuration. -## The gandlf_deployMetrics tool looks for this file to generate your metrics calculation MLCube image. -## If you are an author, this file (or the derivative generated by gandlf_deployMetrics) +## The `gandlf deploy` tool looks for this file to generate your metrics calculation MLCube image. +## If you are an author, this file (or the derivative generated by `gandlf deploy`) ## can be distributed along with your container to enable use as an MLCube. ## See the MLCube specifications (ex: https://mlcommons.github.io/mlcube/runners/) for additional options. @@ -24,7 +24,7 @@ docker: image: mlcommons/gandlf-metrics:0.0.1 ## Generally, these build options will only be needed by GaNDLF maintainers. - # Docker build context relative to $MLCUBE_ROOT. (gandlf_deploy can handle this automatically.) + # Docker build context relative to $MLCUBE_ROOT. (`gandlf deploy` can handle this automatically.) build_context: "../" # Docker file name within docker build context. Any "Dockerfile-*" in the GaNDLF source repo is valid. build_file: "Dockerfile-CPU" @@ -48,6 +48,6 @@ tasks: predictions: predictions/, labels: labels/, # GaNDLF config file. The name should be `parameters.yaml` - parameters_file: {type: file, default: parameters.yaml} + config: {type: file, default: parameters.yaml} } - outputs: { output_path: { type: "file", default: "results.yaml" } } + outputs: { output-file: { type: "file", default: "results.yaml" } } diff --git a/mlcube/model_mlcube/README.md b/mlcube/model_mlcube/README.md index a351ca5d6..5b4595178 100644 --- a/mlcube/model_mlcube/README.md +++ b/mlcube/model_mlcube/README.md @@ -2,7 +2,7 @@ This directory is a template for creating MLCube directories that GaNDLF can deploy. -The `workspace` directory contains a sample `channelIDs.yml` that can be passed to the [`gandlf_constructCSV`](https://mlcommons.github.io/GaNDLF/usage/#constructing-the-data-csv) task and a `config.yml` example used for training/inference. However, generally, the workspace will be populated with a user's own files. +The `workspace` directory contains a sample `channelIDs.yml` that can be passed to the [`gandlf construct-csv`](https://mlcommons.github.io/GaNDLF/usage/#constructing-the-data-csv) task and a `config.yml` example used for training/inference. However, generally, the workspace will be populated with a user's own files. It is recommended that you distribute at least `config.yml` and a `mlcube.yaml` alongside your MLCube. -However, the `gandlf_recoverConfig` task allows users to recover a usable configuration from the MLCube itself, if needed. +However, the `gandlf recover-config` task allows users to recover a usable configuration from the MLCube itself, if needed. diff --git a/mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py b/mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py index 2c56b5c56..9e3600103 100644 --- a/mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py +++ b/mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py @@ -1,6 +1,7 @@ import os import argparse import sys +import logging import pandas as pd @@ -22,32 +23,34 @@ def create_csv(data_path): input_data_df.to_csv("./data.csv", index=False) -def run_gandlf(output_path, device): +def run_gandlf(output_path, model_dir, device): """ A function that calls GaNDLF's run command with the previously created csv. Args: output_path (str): The path to the output file/folder + model_dir (str): The path where model is stored device (str): device to run on (i.e. CPU or GPU) """ exit_status = os.system( - "python3.9 gandlf_run --train False " + "gandlf run --infer " f"--device {device} --config /embedded_config.yml " - f"--modeldir /embedded_model/ -i ./data.csv -o {output_path}" + f"--model-dir /embedded_model/ -i ./data.csv -o {output_path}" ) exit_code = os.WEXITSTATUS(exit_status) - sys.exit(exit_code) + logging.info(exit_code) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--data_path", metavar="", type=str, required=True) - parser.add_argument("--output_path", metavar="", type=str, default=None) + parser.add_argument("--input-data", metavar="", type=str, required=True) + parser.add_argument("--output-path", metavar="", type=str, default=None) + parser.add_argument("--model-dir", metavar="", type=str, default=None) parser.add_argument( "--device", metavar="", type=str, required=True, choices=["cpu", "cuda"] ) args = parser.parse_args() - create_csv(args.data_path) - run_gandlf(args.output_path, args.device) + create_csv(args.input_data) + run_gandlf(args.output_path, args.model_dir, args.device) diff --git a/mlcube/model_mlcube/example_custom_entrypoint/template.py b/mlcube/model_mlcube/example_custom_entrypoint/template.py index b146c9acd..614269d0d 100644 --- a/mlcube/model_mlcube/example_custom_entrypoint/template.py +++ b/mlcube/model_mlcube/example_custom_entrypoint/template.py @@ -2,12 +2,13 @@ a custom entrypoint is needed to create a temporary csv file before calling GaNDLF's run command. This script should expect the same arguments passed to the command `mlcube run --task infer`, i.e. it should expect the inputs and outputs defined in `mlcube.yaml` in the `infer` task. -Note that the device argument will be set by gandlf_deploy (gandlf_deploy will run the entrypoint +Note that the device argument will be set by `gandlf deploy` (`gandlf deploy` will run the entrypoint with --device).""" import os import argparse import sys +import logging def create_csv(data_path): @@ -25,23 +26,23 @@ def run_gandlf(output_path, device): parameters_file (str): The path to the parameters file """ exit_status = os.system( - "python3.9 gandlf_run --train False " + "gandlf run --infer " f"--device {device} --config /embedded_config.yml " - f"--modeldir /embedded_model/ -i ./data.csv -o {output_path}" + f"--model-dir /embedded_model/ -i ./data.csv -o {output_path}" ) exit_code = os.WEXITSTATUS(exit_status) - sys.exit(exit_code) + logging.info(exit_code) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--data_path", metavar="", type=str, required=True) - parser.add_argument("--output_path", metavar="", type=str, default=None) + parser.add_argument("--input-data", metavar="", type=str, required=True) + parser.add_argument("--output-path", metavar="", type=str, default=None) parser.add_argument( "--device", metavar="", type=str, required=True, choices=["cpu", "cuda"] ) args = parser.parse_args() - create_csv(args.data_path) + create_csv(args.input_data) run_gandlf(args.output_path, args.device) diff --git a/mlcube/model_mlcube/mlcube.yaml b/mlcube/model_mlcube/mlcube.yaml index a2cddab5a..42a651278 100644 --- a/mlcube/model_mlcube/mlcube.yaml +++ b/mlcube/model_mlcube/mlcube.yaml @@ -1,6 +1,6 @@ ## This YAML file contains MLCube configuration. -## The gandlf_deploy tool looks for this file to generate your embedded-model MLCube image. -## If you are a model author, this file (or the derivative generated by gandlf_deploy) +## The `gandlf deploy` tool looks for this file to generate your embedded-model MLCube image. +## If you are a model author, this file (or the derivative generated by `gandlf deploy`) ## can be distributed along with your model container to enable use as an MLCube. ## See the MLCube specifications (ex: https://mlcommons.github.io/mlcube/runners/) for additional options. @@ -51,56 +51,56 @@ singularity: tasks: train: # Trains a new model, creating a model directory, or resumes training on an existing model. - entrypoint: "python3.9 gandlf_run --train True --device cpu" + entrypoint: "gandlf run --train --device cpu" parameters: inputs: { # Path to a data csv such as that constructed by the "construct_csv" task. - inputdata: {type: "file", default: "data.csv"}, + input-data: {type: "file", default: "data.csv"}, # Path to a GaNDLF config file. See samples for more examples. config: {type: "file", default: "config.yml"}, } outputs: { # Path to a model directory. Not used if deploying an embedded model. - modeldir: {type: "directory", default: "model/"}, + model-dir: {type: "directory", default: "model/"}, } infer: # Runs inference on some existing model given new data - entrypoint: "python3.9 gandlf_run --train False --device cpu" + entrypoint: "gandlf run --infer --device cpu" parameters: inputs: { # Path to a data csv such as that constructed by the "construct_csv" task. - inputdata: {type: "file", default: "data.csv"}, + input-data: {type: "file", default: "data.csv"}, # Path to a GaNDLF config file. See samples for more examples. # Currently disabled -- inference defaults to using the model's config. #config: {type: "file", default: "config.yml"}, # Path to a model directory. Not used if deploying an embedded model. - modeldir: {type: "directory", default: "model/"}, + model-dir: {type: "directory", default: "model/"}, #device: {type: "str", default: "cpu"}, - #parameters_file: {type: file, default: parameters.yaml} + #config: {type: file, default: parameters.yaml} } - outputs: {output_path: {type: "directory", default: "inference_results"}} + outputs: {output-path: {type: "directory", default: "inference_results"}} construct_csv: # Constructs a data csv from a data directory that can be passed to future steps, to prevent issues with path translation between host and container. - entrypoint: "python3.9 gandlf_constructCSV --relativizePaths True" + entrypoint: "gandlf construct-csv --relativize-paths" parameters: inputs: { # Do NOT change the position of the inputDir parameter! It is relevant due to MLCube mounting rules. # Path to a directory containing input data. Each subject should be a subdirectory, with consistent filenaming conventions. - inputDir: {type: "directory", default: "data/"}, + input-dir: {type: "directory", default: "data/"}, # Path to a file containing identifying strings for each channel (and label, if performing segmentation). - channelsID: {type: "file", default: "channelIDs.yml"}, + channels-id: {type: "file", default: "channelIDs.yml"}, } outputs: { - outputFile: {type: "file", default: "data.csv"} + output-file: {type: "file", default: "data.csv"} } recover_config: # Extracts the config file from the embedded model (if any) in the MLCube. - entrypoint: "python3.9 gandlf_recoverConfig --mlcube internal" + entrypoint: "gandlf recover-config --mlcube" parameters: outputs: { - outputFile: {type: "file", default: "recovered_config.yml"}, + output-file: {type: "file", default: "recovered_config.yml"}, } \ No newline at end of file diff --git a/mlcube/model_mlcube/mlcube_medperf.yaml b/mlcube/model_mlcube/mlcube_medperf.yaml index 8386f544b..712800d5c 100644 --- a/mlcube/model_mlcube/mlcube_medperf.yaml +++ b/mlcube/model_mlcube/mlcube_medperf.yaml @@ -1,6 +1,6 @@ ## This YAML file contains MLCube configuration. -## The gandlf_deploy tool looks for this file to generate your embedded-model MLCube image. -## If you are a model author, this file (or the derivative generated by gandlf_deploy) +## The `gandlf deploy` tool looks for this file to generate your embedded-model MLCube image. +## If you are a model author, this file (or the derivative generated by `gandlf deploy`) ## can be distributed along with your model container to enable use as an MLCube. ## See the MLCube specifications (ex: https://mlcommons.github.io/mlcube/runners/) for additional options. @@ -27,7 +27,7 @@ docker: image: mlcommons/gandlf-model:0.0.1 ## Generally, these build options will only be needed by GaNDLF maintainers. - # Docker build context relative to $MLCUBE_ROOT. (gandlf_deploy can handle this automatically.) + # Docker build context relative to $MLCUBE_ROOT. (`gandlf deploy` can handle this automatically.) build_context: "../" # Docker file name within docker build context. Any "Dockerfile-*" in the GaNDLF source repo is valid. build_file: "Dockerfile-CPU" @@ -51,57 +51,57 @@ singularity: tasks: train: # Trains a new model, creating a model directory, or resumes training on an existing model. - entrypoint: "python3.9 gandlf_run --train True --device cpu" + entrypoint: "gandlf run --train --device cpu" parameters: inputs: { # Path to a data csv such as that constructed by the "construct_csv" task. - data_path: {type: "directory", default: "data/"}, + input-data: {type: "directory", default: "data/"}, # Path to a GaNDLF config file. See samples for more examples. - parameters_file: {type: "file", default: "config.yml"}, + config: {type: "file", default: "config.yml"}, } outputs: { # Path to a model directory. Not used if deploying an embedded model. - output_path: {type: "directory", default: "model/"}, + output-path: {type: "directory", default: "model/"}, } infer: # Runs inference on some existing model given new data - entrypoint: "python3.9 gandlf_run --train False --device cpu" + entrypoint: "gandlf run --infer --device cpu" parameters: inputs: { # Path to a data csv such as that constructed by the "construct_csv" task. - data_path: {type: "directory", default: "data/"}, + input-data: {type: "directory", default: "data/"}, # Path to a GaNDLF config file. See samples for more examples. - #parameters_file: {type: "file", default: "config.yml"}, + #config: {type: "file", default: "config.yml"}, # Path to a model directory. Not used if deploying an embedded model. - modeldir: {type: "directory", default: "additional_files/model/"}, + model-dir: {type: "directory", default: "additional_files/model/"}, #device: {type: "str", default: "cpu"}, - #parameters_file: {type: file, default: parameters.yaml} + #config: {type: file, default: parameters.yaml} } - outputs: {output_path: {type: "directory", default: "inference_results"}} + outputs: {output-path: {type: "directory", default: "inference_results"}} # There are two construct_csv tasks below to make it more convenient for users to specify inference and training datasets. construct_csv: # Constructs a data csv from a data directory that can be passed to future steps, to prevent issues with path translation between host and container. - entrypoint: "python3.9 gandlf_constructCSV --relativizePaths True" + entrypoint: "gandlf construct-csv --relativize-paths" parameters: inputs: { # Do NOT change the position of the inputDir parameter! It is relevant due to MLCube mounting rules. # Path to a directory containing input data. Each subject should be a subdirectory, with consistent filenaming conventions. - inputDir: {type: "directory", default: "data/"}, + input-dir: {type: "directory", default: "data/"}, # Path to a file containing identifying strings for each channel (and label, if performing segmentation). - channelsID: {type: "file", default: "channelIDs.yml"}, + channels-id: {type: "file", default: "channelIDs.yml"}, } outputs: { - outputFile: {type: "file", default: "data.csv"} + output-file: {type: "file", default: "data.csv"} } recover_config: # Extracts the config file from the embedded model (if any) in the MLCube. - entrypoint: "python3.9 gandlf_recoverConfig --mlcube internal" + entrypoint: "gandlf recover-config --mlcube" parameters: outputs: { - outputFile: {type: "file", default: "recovered_config.yml"}, + output-file: {type: "file", default: "recovered_config.yml"}, } diff --git a/pyproject.toml b/pyproject.toml index c8caf08a3..67a73e9dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ omit = [ ".github/*", ".devcontainer/*", "./setup.py", - "./gandlf_*", "./testing/conftest.py", "./tutorials/*", ] diff --git a/samples/config_all_options.yaml b/samples/config_all_options.yaml index 3c117aa68..f460ac252 100644 --- a/samples/config_all_options.yaml +++ b/samples/config_all_options.yaml @@ -1,8 +1,8 @@ # affix version version: { - minimum: 0.0.20, - maximum: 0.0.20 # this should NOT be made a variable, but should be tested after every tag is created + minimum: 0.1.0-dev, + maximum: 0.1.0-dev # this should NOT be made a variable, but should be tested after every tag is created } ## Choose the model parameters here model: diff --git a/samples/config_classification.yaml b/samples/config_classification.yaml index 9795ffca8..23fdc6ff4 100644 --- a/samples/config_classification.yaml +++ b/samples/config_classification.yaml @@ -1,8 +1,8 @@ # affix version version: { - minimum: 0.0.20, - maximum: 0.0.20 # this should NOT be made a variable, but should be tested after every tag is created + minimum: 0.1.0-dev, + maximum: 0.1.0-dev # this should NOT be made a variable, but should be tested after every tag is created } # Choose the model parameters here model: diff --git a/samples/config_getting_started_classification_histo2d.yaml b/samples/config_getting_started_classification_histo2d.yaml index e9b4e6208..34b5d069f 100644 --- a/samples/config_getting_started_classification_histo2d.yaml +++ b/samples/config_getting_started_classification_histo2d.yaml @@ -94,6 +94,6 @@ scheduler: track_memory_usage: false verbose: false version: - maximum: 0.0.20 + maximum: 0.1.0-dev minimum: 0.0.14 weighted_loss: true diff --git a/samples/config_getting_started_classification_rad3d.yaml b/samples/config_getting_started_classification_rad3d.yaml index 3d5466212..e374ee82a 100644 --- a/samples/config_getting_started_classification_rad3d.yaml +++ b/samples/config_getting_started_classification_rad3d.yaml @@ -99,6 +99,6 @@ scheduler: track_memory_usage: false verbose: false version: - maximum: 0.0.20 + maximum: 0.1.0-dev minimum: 0.0.14 weighted_loss: true diff --git a/samples/config_getting_started_regression_histo2d.yaml b/samples/config_getting_started_regression_histo2d.yaml index 9118263ed..1b325595d 100644 --- a/samples/config_getting_started_regression_histo2d.yaml +++ b/samples/config_getting_started_regression_histo2d.yaml @@ -59,6 +59,6 @@ scheduler: track_memory_usage: false verbose: false version: - maximum: 0.0.20 + maximum: 0.1.0-dev minimum: 0.0.14 weighted_loss: true diff --git a/samples/config_getting_started_regression_rad3d.yaml b/samples/config_getting_started_regression_rad3d.yaml index 4a98b1a4f..bb1e3f1e6 100644 --- a/samples/config_getting_started_regression_rad3d.yaml +++ b/samples/config_getting_started_regression_rad3d.yaml @@ -62,6 +62,6 @@ scheduler: track_memory_usage: false verbose: false version: - maximum: 0.0.20 + maximum: 0.1.0-dev minimum: 0.0.14 weighted_loss: false diff --git a/samples/config_getting_started_segmentation_histo2d.yaml b/samples/config_getting_started_segmentation_histo2d.yaml index 97deb0e34..6640e5a0b 100644 --- a/samples/config_getting_started_segmentation_histo2d.yaml +++ b/samples/config_getting_started_segmentation_histo2d.yaml @@ -66,6 +66,6 @@ scheduler: track_memory_usage: false verbose: true version: - maximum: 0.0.20 + maximum: 0.1.0-dev minimum: 0.0.14 weighted_loss: true diff --git a/samples/config_getting_started_segmentation_rad3d.yaml b/samples/config_getting_started_segmentation_rad3d.yaml index c05256426..029b18e7a 100644 --- a/samples/config_getting_started_segmentation_rad3d.yaml +++ b/samples/config_getting_started_segmentation_rad3d.yaml @@ -89,6 +89,6 @@ scheduler: track_memory_usage: false verbose: true version: - maximum: 0.0.20 - minimum: 0.0.20 + maximum: 0.1.0-dev + minimum: 0.1.0-dev weighted_loss: true diff --git a/samples/config_regression.yaml b/samples/config_regression.yaml index ce7b2c806..d3757afcc 100644 --- a/samples/config_regression.yaml +++ b/samples/config_regression.yaml @@ -1,8 +1,8 @@ # affix version version: { - minimum: 0.0.20, - maximum: 0.0.20 # this should NOT be made a variable, but should be tested after every tag is created + minimum: 0.1.0-dev, + maximum: 0.1.0-dev # this should NOT be made a variable, but should be tested after every tag is created } # Choose the model parameters here model: diff --git a/samples/config_segmentation_brats.yaml b/samples/config_segmentation_brats.yaml index e90d5a92c..33ae7378e 100644 --- a/samples/config_segmentation_brats.yaml +++ b/samples/config_segmentation_brats.yaml @@ -1,8 +1,8 @@ # affix version version: { - minimum: 0.0.20, - maximum: 0.0.20 # this should NOT be made a variable, but should be tested after every tag is created + minimum: 0.1.0-dev, + maximum: 0.1.0-dev # this should NOT be made a variable, but should be tested after every tag is created } # Choose the model parameters here model: diff --git a/samples/config_segmentation_histology.yaml b/samples/config_segmentation_histology.yaml index 6551b50c9..cedebe7ce 100644 --- a/samples/config_segmentation_histology.yaml +++ b/samples/config_segmentation_histology.yaml @@ -1,8 +1,8 @@ # affix version version: { - minimum: 0.0.20, - maximum: 0.0.20 # this should NOT be made a variable, but should be tested after every tag is created + minimum: 0.1.0-dev, + maximum: 0.1.0-dev # this should NOT be made a variable, but should be tested after every tag is created } # Choose the model parameters here model: diff --git a/setup.py b/setup.py index 7eb535233..1ee6a3f72 100644 --- a/setup.py +++ b/setup.py @@ -5,9 +5,6 @@ import sys, re, os from setuptools import setup, find_packages -from setuptools.command.install import install -from setuptools.command.develop import develop -from setuptools.command.egg_info import egg_info try: with open("README.md") as readme_file: @@ -17,21 +14,6 @@ sys.stderr.write("Warning: Could not open '%s' due %s\n" % ("README.md", error)) -class CustomInstallCommand(install): - def run(self): - install.run(self) - - -class CustomDevelopCommand(develop): - def run(self): - develop.run(self) - - -class CustomEggInfoCommand(egg_info): - def run(self): - egg_info.run(self) - - try: filepath = "GANDLF/version.py" version_file = open(filepath) @@ -47,36 +29,15 @@ def run(self): for item in os.listdir(os.path.dirname(os.path.abspath(__file__))) if (os.path.isfile(item) and item.startswith("Dockerfile-")) ] -entrypoint_files = [ - item - for item in os.listdir(os.path.dirname(os.path.abspath(__file__))) - if (os.path.isfile(item) and item.startswith("gandlf_")) -] -setup_files = ["setup.py", ".dockerignore", "pyproject.toml", "MANIFEST.in"] -all_extra_files = dockerfiles + entrypoint_files + setup_files -all_extra_files_pathcorrected = [os.path.join("../", item) for item in all_extra_files] -# find_packages should only ever find these as subpackages of gandlf, not as top-level packages -# generate this dynamically? -# GANDLF.GANDLF is needed to prevent recursion madness in deployments -toplevel_package_excludes = [ - "GANDLF.GANDLF", - "anonymize", - "cli", - "compute", - "data", - "grad_clipping", - "losses", - "metrics", - "models", - "optimizers", - "schedulers", - "utils", -] + +# Any extra files should be located at `GANDLF` module folder (not in repo root) +extra_files = ["logging_config.yaml"] +toplevel_package_excludes = ["testing*"] # specifying version for `black` separately because it is also used to [check for lint](https://github.com/mlcommons/GaNDLF/blob/master/.github/workflows/black.yml) black_version = "23.11.0" requirements = [ - "torch==2.1.2", + "torch==2.2.1", f"black=={black_version}", "numpy==1.25.0", "scipy", @@ -84,7 +45,7 @@ def run(self): "SimpleITK!=2.2.1", # https://github.com/mlcommons/GaNDLF/issues/536 "torchvision", "tqdm", - "torchio==0.19.5", + "torchio==0.19.6", "pandas>=2.0.0", "scikit-learn>=0.23.2", "scikit-image>=0.19.1", @@ -115,8 +76,11 @@ def run(self): "zarr", "keyring", "monai==1.3.0", + "click>=8.0.0", + "deprecated", "packaging==24.0", "typer==0.9.0", + "colorlog", ] if __name__ == "__main__": @@ -130,25 +94,26 @@ def run(self): where=os.path.dirname(os.path.abspath(__file__)), exclude=toplevel_package_excludes, ), - cmdclass={ - "install": CustomInstallCommand, - "develop": CustomDevelopCommand, - "egg_info": CustomEggInfoCommand, + entry_points={ + "console_scripts": [ + "gandlf=GANDLF.entrypoints.cli_tool:gandlf", + # old entrypoints + "gandlf_run=GANDLF.entrypoints.run:old_way", + "gandlf_constructCSV=GANDLF.entrypoints.construct_csv:old_way", + "gandlf_collectStats=GANDLF.entrypoints.collect_stats:old_way", + "gandlf_patchMiner=GANDLF.entrypoints.patch_miner:old_way", + "gandlf_preprocess=GANDLF.entrypoints.preprocess:old_way", + "gandlf_anonymizer=GANDLF.entrypoints.anonymizer:old_way", + "gandlf_configGenerator=GANDLF.entrypoints.config_generator:old_way", + "gandlf_verifyInstall=GANDLF.entrypoints.verify_install:old_way", + "gandlf_recoverConfig=GANDLF.entrypoints.recover_config:old_way", + "gandlf_deploy=GANDLF.entrypoints.deploy:old_way", + "gandlf_optimizeModel=GANDLF.entrypoints.optimize_model:old_way", + "gandlf_generateMetrics=GANDLF.entrypoints.generate_metrics:old_way", + "gandlf_debugInfo=GANDLF.entrypoints.debug_info:old_way", + "gandlf_splitCSV=GANDLF.entrypoints.split_csv:old_way", + ] }, - scripts=[ - "gandlf_run", - "gandlf_constructCSV", - "gandlf_collectStats", - "gandlf_patchMiner", - "gandlf_preprocess", - "gandlf_anonymizer", - "gandlf_verifyInstall", - "gandlf_configGenerator", - "gandlf_recoverConfig", - "gandlf_deploy", - "gandlf_optimizeModel", - "gandlf_generateMetrics", - ], classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", @@ -168,7 +133,7 @@ def run(self): long_description=readme, long_description_content_type="text/markdown", include_package_data=True, - package_data={"GANDLF": all_extra_files_pathcorrected}, + package_data={"GANDLF": extra_files}, keywords="semantic, segmentation, regression, classification, data-augmentation, medical-imaging, clinical-workflows, deep-learning, pytorch", zip_safe=False, ) diff --git a/testing/config_classification.yaml b/testing/config_classification.yaml index 0482a7371..b23ff66f3 100644 --- a/testing/config_classification.yaml +++ b/testing/config_classification.yaml @@ -55,7 +55,7 @@ save_output: false scaling_factor: 1 scheduler: triangle version: - maximum: 0.0.20 + maximum: 0.1.0-dev minimum: 0.0.14 weighted_loss: True diff --git a/testing/config_regression.yaml b/testing/config_regression.yaml index 106caa969..b9c8dd764 100644 --- a/testing/config_regression.yaml +++ b/testing/config_regression.yaml @@ -38,7 +38,7 @@ save_output: false scaling_factor: 1 scheduler: triangle version: - maximum: 0.0.20 + maximum: 0.1.0-dev minimum: 0.0.14 weighted_loss: false diff --git a/testing/config_segmentation.yaml b/testing/config_segmentation.yaml index 3006e1eb2..3a5d48a7e 100644 --- a/testing/config_segmentation.yaml +++ b/testing/config_segmentation.yaml @@ -1,9 +1,9 @@ -# Choose the segmentation model here +# Choose the segmentation model here # options: unet, resunet, fcn version: { minimum: 0.0.14, - maximum: 0.0.20 + maximum: 0.1.0-dev } model: { @@ -12,7 +12,7 @@ model: architecture: resunet, # options: unet, resunet, fcn, uinc final_layer: sigmoid, # can be either sigmoid, softmax or none (none == regression) norm_type: instance, # can be either batch or instance - class_list: [0,255], # Set the list of labels the model should train on and predict + class_list: [0, 255], # Set the list of labels the model should train on and predict amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False # n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types } @@ -21,17 +21,12 @@ metrics: - precision - iou - f1 - - recall: { - average: macro, - } + - recall: { average: macro } verbose: True -inference_mechanism: { - grid_aggregator_overlap: average, - patch_overlap: 0, -} +inference_mechanism: { grid_aggregator_overlap: average, patch_overlap: 0 } modality: rad -# Patch size during training - 2D patch for breast images since third dimension is not patched -patch_size: [128,128] +# Patch size during training - 2D patch for breast images since third dimension is not patched +patch_size: [128, 128] # Number of epochs num_epochs: 1 patience: 1 @@ -51,58 +46,52 @@ optimizer: adam # the value of 'k' for cross-validation, this is the percentage of total training data to use as validation; # randomized split is performed using sklearn's KFold method # for single fold run, use '-' before the fold number -nested_training: - { +nested_training: { testing: -5, # this controls the holdout data splits for final model evaluation; use '1' if this is to be disabled - validation: -5 # this controls the validation data splits for model training + validation: -5, # this controls the validation data splits for model training } # various data augmentation techniques # options: affine, elastic, downsample, motion, ghosting, bias, blur, gaussianNoise, swap # keep/edit as needed # all transforms: https://torchio.readthedocs.io/transforms/transforms.html?highlight=transforms -data_augmentation: - { - # 'spatial':{ - # 'probability': 0.5 - # }, - # 'kspace':{ - # 'probability': 0.5 - # }, - # 'bias':{ - # 'probability': 0.5 - # }, - # 'blur':{ - # 'probability': 0.5 - # }, - # 'noise':{ - # 'probability': 0.5 - # }, - # 'swap':{ - # 'probability': 0.5 - # } - } -data_preprocessing: - { +data_augmentation: {} +# 'spatial':{ +# 'probability': 0.5 +# }, +# 'kspace':{ +# 'probability': 0.5 +# }, +# 'bias':{ +# 'probability': 0.5 +# }, +# 'blur':{ +# 'probability': 0.5 +# }, +# 'noise':{ +# 'probability': 0.5 +# }, +# 'swap':{ +# 'probability': 0.5 +# } +data_preprocessing: { # 'threshold':{ - # 'min': 10, + # 'min': 10, # 'max': 75 # }, # 'clip':{ - # 'min': 10, + # 'min': 10, # 'max': 75 - # }, - 'normalize', + # }, + "normalize", # 'resample':{ # 'resolution': [1,2,3] # }, #'resize': [128,128], # this is generally not recommended, as it changes image properties in unexpected ways } # data postprocessing node -data_postprocessing: - { - # 'largest_component', - # 'hole_filling' - } +data_postprocessing: {} +# 'largest_component', +# 'hole_filling' # parallel training on HPC - here goes the command to prepend to send to a high performance computing # cluster for parallel computing during multi-fold training # not used for single fold training diff --git a/testing/conftest.py b/testing/conftest.py index 31d8ceab0..36f666c37 100644 --- a/testing/conftest.py +++ b/testing/conftest.py @@ -1,4 +1,6 @@ import os, pathlib, pytest + +from click.testing import CliRunner from pytest import fixture from .test_full import ( @@ -18,6 +20,12 @@ def device(request): return request.config.getoption("--device") +# Fixture for Click's CliRunner to test Click commands +@pytest.fixture +def cli_runner(): + return CliRunner() + + @pytest.hookimpl(tryfirst=True, hookwrapper=True) def pytest_runtest_makereport(item, call): # execute all other hooks to obtain the report object diff --git a/testing/entrypoints/__init__.py b/testing/entrypoints/__init__.py new file mode 100644 index 000000000..68e088d75 --- /dev/null +++ b/testing/entrypoints/__init__.py @@ -0,0 +1,330 @@ +import importlib +from pathlib import Path +import pytest +from click import BaseCommand +from click.testing import CliRunner +from typing import Callable, Iterable, Any, Mapping, Optional, List, Union +import inspect +from dataclasses import dataclass +import sys +import shlex +import os +import shutil +from unittest.mock import patch, MagicMock + +from yaml.scanner import ScannerError + + +class ArgsExpander: + def __init__(self, orig_func: Callable): + self.orig_func = orig_func + + def normalize(self, args: Iterable[Any], kwargs: Mapping[str, Any]) -> dict: + """ + Say, we have the following function: + `def orig_func(param1: str, param2: str, train_flag=True)` + mocked up our orig function with replica. After test is executed we see replica was called with some + positional and some keyword args (say, it was `replica(foo, param2=bar)`). So, we take origin function signature + (keeping in mind params' default values) and join it with passed args. + + Args: + args (Iterable): the list of positioned args passed to the mock function (ex.: `["foo"]`) + kwargs (Mapping): dict of keyword args passed to the mock function (ex.: `{"param2": "bar"}`) + Returns: + dict: A full mapping of passed arguments, arg_name -> arg_value. Ex.: + ``` + { + "param1": "foo", + "param2": "bar", + "train_flag": True + } + ``` + """ + # Get parameter names from the original function + params = inspect.signature(self.orig_func).parameters + arg_names = list(params.keys()) + + # Build a dictionary of argument names to passed values + # Start with positional arguments + passed_args = {arg_names[i]: arg for i, arg in enumerate(args)} + + # Update the dictionary with keyword arguments + passed_args.update(kwargs) + + # For any missing arguments that have defaults, add those to the dictionary + for name, param in params.items(): + if name not in passed_args and param.default is not inspect.Parameter.empty: + passed_args[name] = param.default + + return passed_args + + +@dataclass +class CliCase: + """ + Represent a specific case. All passed new way lines as well as old way lines should finally have exactly the same + behavior and call a real logic function with `expected_args`. + + Args: + should_succeed (bool): if that console command should succeed or fail + new_way_lines (List[str], optional): command lines of the following format (in reality are passed as args to `gandlf` cli subcommand): + '--input-dir input/ -c config.yaml -m rad --output-file output/' + old_way_lines (List[str], optional): list of str of the same format, but for old-fashioned format of cmd execution + (say, via `gandlf_patchMiner`): + '--inputDir input/ -c config.yaml -m rad --outputFile output/' + expected_args (dict): dict or params that should be finally passed to real logics code. + Required if `should_succeed`. + + """ + + should_succeed: bool = True + new_way_lines: list[str] = None + old_way_lines: list[str] = None + expected_args: dict = None + + +@dataclass +class _TmpPath: + path: str + + +# it's not a typo in class name - I want to keep the same name len for dir / file / na +# for config to be more readable (paths are aligned in one column then) +@dataclass +class TmpDire(_TmpPath): + pass + + +@dataclass +class TmpFile(_TmpPath): + content: Optional[Union[str, bytes]] = None + + +@dataclass +class TmpNoEx(_TmpPath): + pass + + +class TempFileSystem: + """ + Given a dict of path -> path description (dir / file with content / na), creates + the paths that are needed (dirs + files), and remove everything on the exit. + For `na` files ensures they do not exist. + + If any of given paths already present on file system, then raises an error. + + By default, creates requested structure right in working directory. + """ + + def __init__(self, config: list[_TmpPath], root_dir=None): + self.config = config + self.root_dir = root_dir + self.temp_paths: list[Path] = [] + + def __enter__(self): + try: + self.setup_file_system() + except Exception as e: + self.cleanup() + raise e + return self + + def setup_file_system(self): + for item in self.config: + # no tmp files should exist beforehand as we will clean everything on exit + path = Path(item.path) + if self.root_dir: + path = Path(self.root_dir) / path + if path.exists(): + raise FileExistsError( + path, + "For temp file system all paths must absent beforehand as we remove everything " + "at the end.", + ) + if isinstance(item, TmpDire): + path.mkdir(parents=True, exist_ok=False) + elif isinstance(item, TmpNoEx): + pass # we already ensured it not exists + elif isinstance(item, TmpFile): + path.parent.mkdir(parents=True, exist_ok=True) + if isinstance(item.content, bytes): + with open(path, "wb") as fin: + fin.write(item.content) + elif isinstance(item.content, str) or not item.content: + with open(path, "w") as fin: + if item.content: + fin.write(item.content) + else: + raise ValueError( + f"Given tmp file has an invalid content (should be str or bytes): {item}" + ) + + else: + raise ValueError(f"Given tmp file entity is of invalid type: {item}") + self.temp_paths.append(path) + + def cleanup(self): + for path in reversed(self.temp_paths): + if path.is_file(): + os.remove(path) + elif path.is_dir(): + shutil.rmtree(path) + elif not path.exists(): + pass + else: + raise ValueError( + f"wrong path {path}, not a dir, not a file. Cannot remove!" + ) + + def __exit__(self, exc_type, exc_val, exc_tb): + self.cleanup() + + +def args_diff(expected_args: dict[str, Any], actual_args: dict[str, Any]) -> list[str]: + result = [] + for k in set(expected_args) | set(actual_args): + if ( + k not in expected_args + or k not in actual_args + or expected_args[k] != actual_args[k] + ): + result.append(k) + return result + + +def assert_called_properly( + mock_func: MagicMock, expected_args: dict, args_normalizer: ArgsExpander +) -> None: + """ + Check that mock_func was called exactly once and passed args are identical to expected_args. + Args: + mock_func (MagicMock): mock object that replaces a real code function. + expected_args (dict): a mapping of args that mock_func is expected to be called with + args_normalizer (ArgsExpander): wrapper around original function (mocked by mock_func), that can build a dict of + actual args passed basing on signature of origin function. + Returns: + None. If test fails, raises AssertionError + """ + mock_func.assert_called_once() + executed_call = mock_func.mock_calls[0] + actual_args = args_normalizer.normalize( + args=executed_call.args, kwargs=executed_call.kwargs + ) + orig_args = expected_args + expected_args = orig_args.copy() + for arg, val in orig_args.items(): + # if expected arg is `...` , then we do not care about its actual value + # just check the key presents in actual args + if val is Ellipsis: + assert arg in actual_args + expected_args[arg] = actual_args[arg] + + assert expected_args == actual_args, ( + f"Function was not called with the expected arguments: {expected_args=} vs {actual_args=}, " + f"diff {args_diff(expected_args, actual_args)}" + ) + + +def run_test_case( + cli_runner: Optional[CliRunner], + file_system_config: List[_TmpPath], + case: CliCase, + real_code_function_path: str, + new_way: Optional[BaseCommand], + old_way: Callable, + old_script_name: str, + patched_return_value: Any = None, +): + """ + Given a case (list of CLI lines), check if calling all these cli commands would lead to executing main code function + with the same expected args. + Args: + cli_runner (CliRunner): Click test runner. Is used to check new-way commands (via Click CLI tool) parse commands + properly. + file_system_config (list[_TmpPath]): describes a file/dir system required for the test case. The following + entities are supported: + - TmpFile: declares the file with given path should exist (and, optionally, filled with given content) + - TmpDire: declares the folder with given path should exist + - TmpNoEx: declares there should be no objects on given path. + case (CliCase): case to be tested. + real_code_function_path (str): path to the function that contains a real business code. + Ex.: `"GANDLF.entrypoints.anonymizer.run_anonymizer"`. This function would be mocked; test checks mock is + called with expected args. + new_way (click.BaseCommand): Click function that actually parses CLI args and call + `real_code_function_path` finally. May be absent if there is no `new_way_lines` in the case. + old_way (Callable): Python function that is used in gandlf_* entrypoints. Should parse its CLI + arguments via `argparse` and call `real_code_function_path` finally. May be absent if there is no + `old_way_lines` in the case. + old_script_name (str): script name that old_way command should be run with via cli (one of `gandlf_*` entrypoint + names) + patched_return_value (Any, optional): Though normally CLI tools just parse args and run + `real_code_function_path`, sometimes its returned result is processed further. So, if given, mock would + return this value. + Returns: + None. Fails if one of tests is failed. + + """ + module_path, func_name = real_code_function_path.rsplit(".", 1) + module = importlib.import_module(module_path) + real_code_function = getattr(module, func_name) + + args_normalizer = ArgsExpander(real_code_function) + with patch( + real_code_function_path, return_value=patched_return_value + ) as mock_logic: + # tests that all click commands trigger execution with expected args + for new_line in case.new_way_lines or []: + try: + mock_logic.reset_mock() + new_cmd = shlex.split(new_line) + with TempFileSystem(file_system_config): + result = cli_runner.invoke(new_way, new_cmd) + if case.should_succeed: + assert result.exit_code == 0 + assert_called_properly( + mock_logic, case.expected_args, args_normalizer + ) + else: + assert result.exit_code != 0 + except BaseException: + print(f"Test failed on the new case: {new_line}") + print(f"Exception: {result.exception}") + print(f"Exc info: {result.exc_info}") + print(f"output: {result.output}") + raise + + # tests that old way commands via `gandlf_*` script trigger the same expected_args + for old_line in case.old_way_lines or []: + try: + mock_logic.reset_mock() + argv = [old_script_name] + shlex.split(old_line) + if case.should_succeed: + with patch.object(sys, "argv", argv), TempFileSystem( + file_system_config + ): + old_way() + assert_called_properly( + mock_logic, case.expected_args, args_normalizer + ) + else: + with ( + # here we can list possible errors that are allowed to be raised. + # Any other raised exception would be treated as test fail + pytest.raises( + ( + SystemExit, + AssertionError, + ScannerError, + IsADirectoryError, + FileNotFoundError, + ) + ) as e, + patch.object(sys, "argv", argv), + TempFileSystem(file_system_config), + ): + old_way() + if isinstance(e.value, SystemExit): + assert e.value.code != 0 + except BaseException: + print(f"Test failed on the old case: {old_line}") + raise diff --git a/testing/entrypoints/test_anonymizer.py b/testing/entrypoints/test_anonymizer.py new file mode 100644 index 000000000..99d060b1d --- /dev/null +++ b/testing/entrypoints/test_anonymizer.py @@ -0,0 +1,124 @@ +import os.path +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.anonymizer import new_way, old_way + +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.anonymizer.run_anonymizer" +OLD_SCRIPT_NAME = "gandlf_anonymizer" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("input/"), + TmpFile("config.yaml", content="foo: bar"), + TmpDire("output/"), + TmpNoEx("path_na/"), + TmpFile("output.csv", content="col1,col2\n123,456\n"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--input-dir . --config config.yaml --modality rad --output-file output/", + # tests short arg aliases + "-i . -c config.yaml -m rad -o output/", + # tests modality has default value + "-i . -c config.yaml -o output/", + ], + old_way_lines=[ + "--inputDir . --config config.yaml --modality rad --outputFile output/", + "-i . -c config.yaml -m rad -o output/", + "-i . -c config.yaml -o output/", + ], + expected_args={ + "input_path": os.path.normpath("."), + "output_path": os.path.normpath("output/"), + "parameters": {"foo": "bar"}, + "modality": "rad", + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # tests that config is optional, and that output may not exist + "-i . -o path_na" + ], + old_way_lines=["-i . -o path_na"], + expected_args={ + "input_path": os.path.normpath("."), + "output_path": os.path.normpath("path_na"), + "parameters": None, + "modality": "rad", + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # tests that output may be an existing file + "-i . -o output.csv" + ], + old_way_lines=["-i . -o output.csv"], + expected_args={ + "input_path": os.path.normpath("."), + "output_path": os.path.normpath("output.csv"), + "parameters": None, + "modality": "rad", + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # tests that modality 'histo' is supported also + "-i . -m histo -o output.csv" + ], + old_way_lines=["-i . -m histo -o output.csv"], + expected_args={ + "input_path": os.path.normpath("."), + "output_path": os.path.normpath("output.csv"), + "parameters": None, + "modality": "histo", + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # tests that input should exist + "-i path_na -o output.csv", + # tests that input is required + "-o output.csv", + # tests that output is required + "-i .", + # tests that config file, if provided, should exist + "-i . -c path_na -o output.csv", + # tests that modality cannot take arbitrary values + "-i . -m fake_modality -o output.csv", + ], + old_way_lines=[ + # "-i path_na -o output.csv", # <- in old way input is not required to exist + "-o output.csv", + "-i .", + # "-i . -c path_na -o output.csv", # <- in old way if config file does not exist, it just skipped silently + # "-i . -m fake_modality -o output.csv", # <- in old way there is no such a validation in cli part + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_cli_tool.py b/testing/entrypoints/test_cli_tool.py new file mode 100644 index 000000000..8417d1f76 --- /dev/null +++ b/testing/entrypoints/test_cli_tool.py @@ -0,0 +1,10 @@ +from click.testing import CliRunner +from GANDLF.entrypoints.cli_tool import gandlf +from GANDLF.version import __version__ + + +def test_version_command(): + runner = CliRunner() + result = runner.invoke(gandlf, ["--version"]) + assert result.exit_code == 0 + assert __version__ in result.output diff --git a/testing/entrypoints/test_collect_stats.py b/testing/entrypoints/test_collect_stats.py new file mode 100644 index 000000000..35de03189 --- /dev/null +++ b/testing/entrypoints/test_collect_stats.py @@ -0,0 +1,117 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.collect_stats import new_way, old_way + +from . import CliCase, run_test_case, TmpNoEx, TmpDire, TmpFile + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.collect_stats._read_data_and_plot" +OLD_SCRIPT_NAME = "gandlf_collectStats" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_csv = "col1,col2\n1,100\n2,200" +test_file_system = [ + TmpDire("model_full/"), + TmpFile("model_full/logs_training.csv", content=test_csv), + TmpFile("model_full/logs_validation.csv", content=test_csv), + TmpFile("model_full/logs_testing.csv", content=test_csv), + TmpDire("model_no_test/"), + TmpFile("model_no_test/logs_training.csv", content=test_csv), + TmpFile("model_no_test/logs_validation.csv", content=test_csv), + TmpDire("model_empty/"), + TmpFile("file.txt", content="foobar"), + TmpDire("output/"), + TmpNoEx("output_na/"), + TmpNoEx("path_na/"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--model-dir model_full/ --output-dir output/", + # tests short arg aliases + "-m model_full -o output/", + ], + old_way_lines=[ + "--modeldir model_full/ --outputdir output/", + "-m model_full/ -o output/", + ], + expected_args={ + "training_logs_path": "model_full/logs_training.csv", + "validation_logs_path": "model_full/logs_validation.csv", + "testing_logs_path": "model_full/logs_testing.csv", + "output_plot_path": "output/plot.png", + "output_file": "output/data.csv", + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # test that it works without testing log + "-m model_no_test -o output/" + ], + old_way_lines=["-m model_no_test/ -o output/"], + expected_args={ + "training_logs_path": "model_no_test/logs_training.csv", + "validation_logs_path": "model_no_test/logs_validation.csv", + "testing_logs_path": None, + "output_plot_path": "output/plot.png", + "output_file": "output/data.csv", + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # test that output folder may not exist + "-m model_full -o output_na/" + ], + old_way_lines=["-m model_full/ -o output_na/"], + expected_args={ + "training_logs_path": "model_full/logs_training.csv", + "validation_logs_path": "model_full/logs_validation.csv", + "testing_logs_path": "model_full/logs_testing.csv", + "output_plot_path": "output_na/plot.png", + "output_file": "output_na/data.csv", + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # tests that input should exist + "-m path_na -o output/", + # tests that input is required + "-o output/", + # tests that output is required + "-m model_full", + # test that file is not accepted for input + "-m file.txt -o output/", + # test that file is not accepted for output + "-m model_full -o file.txt", + ], + old_way_lines=[ + # "-m path_na -o output/", # <- in old way model_dir is not required to exist + # "-o output/", # <- ... or even be passed (code would fail immediately on data reading instead) + # "-m model_full", # <- same with output (if no output provided, code would fail on path operations) + # "-m file.txt -o output/", # <- same. No restrictions on model path + # "-m model_full -o file.txt", # <- same + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_config_generator.py b/testing/entrypoints/test_config_generator.py new file mode 100644 index 000000000..e5c58c9d4 --- /dev/null +++ b/testing/entrypoints/test_config_generator.py @@ -0,0 +1,80 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.config_generator import new_way, old_way + +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.config_generator.config_generator" +OLD_SCRIPT_NAME = "gandlf_configGenerator" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpFile("config.yaml", content="foo: bar"), + TmpFile("strategy.yaml", content="baz: abc"), + TmpFile("output.csv", content="col1,col2\n123,456\n"), + TmpDire("output/"), + TmpNoEx("path_na"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--config config.yaml --strategy strategy.yaml --output output/", + # tests short arg aliases + "-c config.yaml -s strategy.yaml -o output/", + ], + old_way_lines=[ + "--config config.yaml --strategy strategy.yaml --output output/", + "-c config.yaml -s strategy.yaml -o output/", + ], + expected_args={ + "base_config_path": "config.yaml", + "strategy_path": "strategy.yaml", + "output_dir": "output/", + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # config should exist + "-c path_na -s strategy.yaml -o output/", + # strategy should exist + "-c config.yaml -s path_na -o output/", + # config is required + "-s strategy.yaml -o output/", + # strategy is required + "-c config.yaml -o output/", + # output is required + "-c config.yaml -s strategy.yaml", + # output should be a dir, not file + "-c config.yaml -s strategy.yaml -o output.csv", + ], + old_way_lines=[ + # "-c path_na -s strategy.yaml -o output/", # in old way we do not check file existence + # "-c config.yaml -s path_na -o output/", # same + "-s strategy.yaml -o output/", + "-c config.yaml -o output/", + "-c config.yaml -s strategy.yaml", + # "-c config.yaml -s strategy.yaml -o output.csv", # and do not check output is directory + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_construct_csv.py b/testing/entrypoints/test_construct_csv.py new file mode 100644 index 000000000..ee13a65ce --- /dev/null +++ b/testing/entrypoints/test_construct_csv.py @@ -0,0 +1,181 @@ +import os + +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.construct_csv import new_way, old_way + +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.construct_csv.writeTrainingCSV" +OLD_SCRIPT_NAME = "gandlf_constructCSV" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("input/"), + TmpFile("channels_str.yaml", content="channels: _yaml1.gz,_yaml2.gz"), + TmpFile("channels_list.yaml", content="channels:\n - _yaml1.gz\n - _yaml2.gz"), + TmpFile( + "channels_labels.yaml", content="channels: _yaml1.gz,_yaml2.gz\nlabel: _yaml.gz" + ), + TmpFile("output.csv", content="foobar"), + TmpNoEx("output_na.csv"), + TmpDire("output/"), + TmpNoEx("path_na"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--input-dir input/ --channels-id _t1.nii.gz,_t2.nii.gz --label-id _seg.nii.gz --output-file output.csv --relativize-paths", + # tests short arg aliases + "-i input/ -c _t1.nii.gz,_t2.nii.gz -l _seg.nii.gz -o output.csv -r", + ], + old_way_lines=[ + "--inputDir input/ --channelsID _t1.nii.gz,_t2.nii.gz --labelID _seg.nii.gz --outputFile output.csv --relativizePaths True", + "-i input/ -c _t1.nii.gz,_t2.nii.gz -l _seg.nii.gz -o output.csv -r True", + ], + expected_args={ + "inputDir": os.path.normpath("input/"), + "channelsID": "_t1.nii.gz,_t2.nii.gz", + "labelID": "_seg.nii.gz", + "outputFile": os.path.normpath("output.csv"), + "relativizePathsToOutput": True, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # -r by default False + "-i input/ -c _t1.nii.gz,_t2.nii.gz -l _seg.nii.gz -o output.csv" + ], + old_way_lines=[ + "-i input/ -c _t1.nii.gz,_t2.nii.gz -l _seg.nii.gz -o output.csv -r False", + "-i input/ -c _t1.nii.gz,_t2.nii.gz -l _seg.nii.gz -o output.csv", + ], + expected_args={ + "inputDir": os.path.normpath("input/"), + "channelsID": "_t1.nii.gz,_t2.nii.gz", + "labelID": "_seg.nii.gz", + "outputFile": os.path.normpath("output.csv"), + "relativizePathsToOutput": False, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # channels may be read from yaml (str or list) + "-i input/ -c _yaml1.gz,_yaml2.gz -l _seg.nii.gz -o output.csv", + "-i input/ -c channels_str.yaml -l _seg.nii.gz -o output.csv", + "-i input/ -c channels_list.yaml -l _seg.nii.gz -o output.csv", + ], + old_way_lines=[ + "-i input/ -c _yaml1.gz,_yaml2.gz -l _seg.nii.gz -o output.csv", + "-i input/ -c channels_str.yaml -l _seg.nii.gz -o output.csv", + "-i input/ -c channels_list.yaml -l _seg.nii.gz -o output.csv", + ], + expected_args={ + "inputDir": os.path.normpath("input/"), + "channelsID": "_yaml1.gz,_yaml2.gz", + "labelID": "_seg.nii.gz", + "outputFile": os.path.normpath("output.csv"), + "relativizePathsToOutput": False, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # label-id can be defined in channels yaml also; arg value is skipped then + "-i input/ -c channels_labels.yaml -l _arg_no_use.gz -o output.csv", + "-i input/ -c channels_labels.yaml -o output.csv", + ], + old_way_lines=[ + "-i input/ -c channels_labels.yaml -l _arg_no_use.gz -o output.csv", + "-i input/ -c channels_labels.yaml -o output.csv", + ], + expected_args={ + "inputDir": os.path.normpath("input/"), + "channelsID": "_yaml1.gz,_yaml2.gz", + "labelID": "_yaml.gz", + "outputFile": os.path.normpath("output.csv"), + "relativizePathsToOutput": False, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # label-id can be skipped totally + "-i input/ -c _yaml1.gz,_yaml2.gz -o output.csv", + "-i input/ -c channels_str.yaml -o output.csv", + "-i input/ -c channels_list.yaml -o output.csv", + ], + old_way_lines=[ + "-i input/ -c _yaml1.gz,_yaml2.gz -o output.csv", + "-i input/ -c channels_str.yaml -o output.csv", + "-i input/ -c channels_list.yaml -o output.csv", + ], + expected_args={ + "inputDir": os.path.normpath("input/"), + "channelsID": "_yaml1.gz,_yaml2.gz", + "labelID": None, + "outputFile": os.path.normpath("output.csv"), + "relativizePathsToOutput": False, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # output may not exist + "-i input/ -c _t1.nii.gz,_t2.nii.gz -o output_na.csv" + ], + old_way_lines=["-i input/ -c _t1.nii.gz,_t2.nii.gz -o output_na.csv"], + expected_args={ + "inputDir": os.path.normpath("input/"), + "channelsID": "_t1.nii.gz,_t2.nii.gz", + "labelID": None, + "outputFile": os.path.normpath("output_na.csv"), + "relativizePathsToOutput": False, + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # input should be passed & exist + "-i path_na -c _t1.nii.gz,_t2.nii.gz -o output.csv", + "-c channels_str.yaml -o output.csv", + # channel should be passed; file may not exist, value is treated as list of suffixes + "-i input/ -o output.csv", + # output should be passed and should not point to existing dir (file only is supported) + "-i input/ -c _t1.nii.gz,_t2.nii.gz", + "-i input/ -c _t1.nii.gz,_t2.nii.gz -o output/", + ], + old_way_lines=[ + # input should be passed & exist + # "-i path_na -c _t1.nii.gz,_t2.nii.gz -o output.csv", # no checks for existence in old way + "-c channels_str.yaml -o output.csv", + # channel should be passed + "-i input/ -o output.csv", + # output should be passed + "-i input/ -c _t1.nii.gz,_t2.nii.gz", + # "-i input/ -c _t1.nii.gz,_t2.nii.gz -o output/", # no checks for file/dir in old way + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_debug_info.py b/testing/entrypoints/test_debug_info.py new file mode 100644 index 000000000..9cf12bed0 --- /dev/null +++ b/testing/entrypoints/test_debug_info.py @@ -0,0 +1,34 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.debug_info import new_way, old_way + +from . import CliCase, run_test_case + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.debug_info._debug_info" +OLD_SCRIPT_NAME = "gandlf_debugInfo" + + +# subcommand is trivial, we just check both new_way and old_way run successfully +test_file_system = [] +test_cases = [ + CliCase( + should_succeed=True, new_way_lines=[""], old_way_lines=[""], expected_args={} + ) +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_deploy.py b/testing/entrypoints/test_deploy.py new file mode 100644 index 000000000..dd8d18f6f --- /dev/null +++ b/testing/entrypoints/test_deploy.py @@ -0,0 +1,334 @@ +import pickle # nosec B403 + +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.deploy import new_way, old_way + +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.deploy.run_deployment" +OLD_SCRIPT_NAME = "gandlf_deploy" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("model/"), + TmpFile("model/parameters.pkl", content=pickle.dumps({"foo": "bar"})), + TmpFile("model.file", content="123321"), + TmpFile("config.yaml", content="baz: abc"), + TmpDire("config_folder/"), + TmpDire("mlcube_root/"), + TmpFile("mlcube_root/mlcube.yaml"), + TmpFile("tmp_test_entrypoint.py", content="print('Hello GaNDLF!')"), + TmpFile("output.csv", content="foobar"), + TmpNoEx("output_na.csv"), + TmpDire("output/"), + TmpNoEx("output_na/"), + TmpNoEx("path_na"), +] +test_cases = [ + # ======================= + # Full command + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--model model/ --config config.yaml --target docker --mlcube-type model " + + "--mlcube-root mlcube_root/ --output-dir output/ " + + "--requires-gpu --entrypoint tmp_test_entrypoint.py", + # tests short arg aliases + "-m model/ -c config.yaml -t docker --mlcube-type model " + + "-r mlcube_root/ -o output/ " + + "-g -e tmp_test_entrypoint.py", + # tests requires-gpu is True by default if not passed + "-m model/ -c config.yaml -t docker --mlcube-type model " + + "-r mlcube_root/ -o output/ " + + "-e tmp_test_entrypoint.py", + ], + old_way_lines=[ + # full command + "--model model/ --config config.yaml --target docker --mlcube-type model " + + "--mlcube-root mlcube_root/ --outputdir output/ " + + "--requires-gpu True --entrypoint tmp_test_entrypoint.py", + # tests short arg aliases + "-m model/ -c config.yaml -t docker --mlcube-type model " + + "-r mlcube_root/ -o output/ " + + "-g True -e tmp_test_entrypoint.py", + # tests requires-gpu is True by default if not passed + "-m model/ -c config.yaml -t docker --mlcube-type model " + + "-r mlcube_root/ -o output/ " + + "-e tmp_test_entrypoint.py", + ], + expected_args={ + "mlcubedir": "mlcube_root/", + "outputdir": "output/", + "target": "docker", + "mlcube_type": "model", + "entrypoint_script": "tmp_test_entrypoint.py", + "configfile": "config.yaml", + "modeldir": "model/", + "requires_gpu": True, + }, + ), + # ================= + # model-type checks + CliCase( + should_succeed=False, + new_way_lines=[ + # model_type is required and does not accept random values + "-m model/ -c config.yaml -t docker -r mlcube_root/ -o output/", + "--model-type random_type -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/", + ], + old_way_lines=[ + "-m model/ -c config.yaml -t docker -r mlcube_root/ -o output/", + "--model-type random_type -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/", + ], + ), + # ================== + # Model MLCube + CliCase( + should_succeed=False, + new_way_lines=[ + # for model_type=model everything except entrypoint and config is required + "--mlcube-type model -c config.yaml -t docker -r mlcube_root/ -o output/", + "--mlcube-type model -m model/ -c config.yaml -r mlcube_root/ -o output/", + "--mlcube-type model -m model/ -c config.yaml -t docker -o output/", + "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/", + # also model should point to existing folder + "--mlcube-type model -m model.file -c config.yaml -t docker -r mlcube_root/ -o output/", + "--mlcube-type model -m path_na -c config.yaml -t docker -r mlcube_root/ -o output/", + # config if passed should point to file, not to folder + "--mlcube-type model -m model/ -c path_na -t docker -r mlcube_root/ --o output/", + "--mlcube-type model -m model/ -c config_folder/ -t docker -r mlcube_root/ -o output/", + # the only supported target is docker, no random values + "--mlcube-type model -m model/ -c config_folder/ -t stevedore -r mlcube_root/ -o output/", + # model_root should point to existing folder + "--mlcube-type model -m model/ -c config.yaml -t docker -r path_na -o output/", + "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/mlcube.yaml -o output/", + # output should point to a folder or to a non-existent path + "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/ -o output.csv", + # entrypoint, if passed, should point to existing file + "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/ -e path_na", + "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/ -e empty_folder/", + ], + old_way_lines=[ + # for model_type=model everything except config and entrypoint is required + "--mlcube-type model -c config.yaml -t docker -r mlcube_root/ -o output/", + "--mlcube-type model -m model/ -c config.yaml -r mlcube_root/ -o output/", + "--mlcube-type model -m model/ -c config.yaml -t docker -o output/", + "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/", + # also model should point to existing folder + # vvv---- in old way we do not check that model is dir (such a check happens later) ----vvv + # "--mlcube-type model -m model.file -c config.yaml -t docker -r mlcube_root/ -o output/", + # vvv---- Also we do not check model path existence (such a check happens later) ----vvv + # "--mlcube-type model -m path_na -c config.yaml -t docker -r mlcube_root/ -o output/", + # config if passed should point to file, not to folder + # vvv---- in old way we don't check file existence ----vvv + # "--mlcube-type model -m model/ -c path_na -t docker -r mlcube_root/ --o output/", + # vvv---- as well as that config is file ----vvv + # "--mlcube-type model -m model/ -c config_folder/ -t docker -r mlcube_root/ -o output/", + # the only supported target is docker, no random values + # vvv---- no such a check in old_way ----vvv + # "--mlcube-type model -m model/ -c config_folder/ -t stevedore -r mlcube_root/ -o output/", + # model_root should point to existing folder + # vvv---- no check for root existence in old_way (it happens later) ----vvv + # "--mlcube-type model -m model/ -c config.yaml -t docker -r path_na -o output/", + # vvv---- no check root is dir in old_way (it happens later) ----vvv + # "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/mlcube.yaml -o output/", + # output should point to a folder or to non-existent path + # vvv---- despite this command fails, it fails when we try to create a file + # under output "folder" (while a real check that output is not a file happens later). + # Thus, as there is no real explicit check in old_way, this test is disabled + # ----vvv + # "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/ -o output.csv", + # entrypoint if passed should point to existing file + # vvv---- no check entrypoint exists in old_way (it happens later) ----vvv + # "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/ -e path_na", + # vvv---- no such a check in old_way ----vvv + # "--mlcube-type model -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/ -e empty_folder/", + ], + ), + CliCase( # Model + entrypoint + should_succeed=True, + new_way_lines=[ + # for model_type=model entrypoint is optional + "-m model/ -c config.yaml -t docker --mlcube-type model -r mlcube_root/ -o output/" + ], + old_way_lines=[ + "-m model/ -c config.yaml -t docker --mlcube-type model -r mlcube_root/ -o output/" + ], + expected_args={ + "mlcubedir": "mlcube_root/", + "outputdir": "output/", + "target": "docker", + "mlcube_type": "model", + "entrypoint_script": None, + "configfile": "config.yaml", + "modeldir": "model/", + "requires_gpu": True, + }, + ), + CliCase( # Model + config + should_succeed=True, + new_way_lines=[ + # for model_type=model config may be skipped; is restored from model then (`parameters.pkl`) + "-m model/ -t docker --mlcube-type model -r mlcube_root/ -o output/" + ], + old_way_lines=[ + "-m model/ -t docker --mlcube-type model -r mlcube_root/ -o output/" + ], + expected_args={ + "mlcubedir": "mlcube_root/", + "outputdir": "output/", + "target": "docker", + "mlcube_type": "model", + "entrypoint_script": None, + "configfile": "output/original_config.yml", + "modeldir": "model/", + "requires_gpu": True, + }, + ), + # ================ + # Metrics MLCube + CliCase( + should_succeed=True, + new_way_lines=[ + # for model_type=metrics, model, config and entrypoint may be skipped + "--mlcube-type metrics -t docker -r mlcube_root/ -o output/" + ], + old_way_lines=[ + # for model_type=metrics, model, config and entrypoint may be skipped + "--mlcube-type metrics -t docker -r mlcube_root/ -o output/" + ], + expected_args={ + "mlcubedir": "mlcube_root/", + "outputdir": "output/", + "target": "docker", + "mlcube_type": "metrics", + "entrypoint_script": None, + "configfile": None, + "modeldir": None, + "requires_gpu": True, + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # for model_type=metrics, target, mlcube_root and output are required + "--mlcube-type metrics -m model/ -c config.yaml -r mlcube_root/ -o output/", + "--mlcube-type metrics -m model/ -c config.yaml -t docker -o output/", + "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/", + # model if passed should point to existing folder + "--mlcube-type metrics -m model.file -c config.yaml -t docker -r mlcube_root/ -o output/", + "--mlcube-type metrics -m path_na -c config.yaml -t docker -r mlcube_root/ -o output/", + # config if passed should point to file, not to folder + "--mlcube-type metrics -m model/ -c path_na -t docker -r mlcube_root/ --o output/", + "--mlcube-type metrics -m model/ -c config_folder/ -t docker -r mlcube_root/ -o output/", + # the only supported target is docker, no random values + "--mlcube-type metrics -m model/ -c config_folder/ -t stevedore -r mlcube_root/ -o output/", + # model_root should point to existing folder + "--mlcube-type metrics -m model/ -c config.yaml -t docker -r path_na -o output/", + "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/mlcube.yaml -o output/", + # output should point to a folder or to a non-existent path, not to a file + "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/ -o output.csv", + # entrypoint, if passed, should point to existing file + "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/ -e path_na", + "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/ --o output/ -e empty_folder/", + ], + old_way_lines=[ + # for model_type=metrics, target, mlcube_root and output are required + "--mlcube-type metrics -m model/ -c config.yaml -r mlcube_root/ -o output/", + "--mlcube-type metrics -m model/ -c config.yaml -t docker -o output/", + "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/", + # also model should point to existing folder + # vvv---- in old way we do not check if model is dir (such a check happens later) ----vvv + # "--mlcube-type metrics -m model.file -c config.yaml -t docker -r mlcube_root/ -o output/", + # vvv---- Also we do not check model path existence (such a check happens later) ----vvv + # "--mlcube-type metrics -m path_na -c config.yaml -t docker -r mlcube_root/ -o output/", + # config if passed should point to file, not to folder + # vvv---- in old way we don't check file existence ----vvv + # "--mlcube-type metrics -m model/ -c path_na -t docker -r mlcube_root/ --o output/", + # vvv---- as well as that config is file ----vvv + # "--mlcube-type metrics -m model/ -c config_folder/ -t docker -r mlcube_root/ -o output/", + # the only supported target is docker, no random values + # vvv---- no such a check in old_way ----vvv + # "--mlcube-type metrics -m model/ -c config_folder/ -t stevedore -r mlcube_root/ -o output/", + # model_root should point to existing folder + # vvv---- no check for root existence in old_way (it happens later) ----vvv + # "--mlcube-type metrics -m model/ -c config.yaml -t docker -r path_na -o output/", + # vvv---- no check root is dir in old_way (it happens later) ----vvv + # "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/mlcube.yaml -o output/", + # output should point to a folder or to a non-existent path + # vvv---- despite this command fails, it fails when we try to create a file + # under output "folder" (while a real check that output is not a file happens later). + # Thus, as there is no real explicit check in old_way, this test is disabled + # ----vvv + # "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/ -o output.csv", + # entrypoint, if passed, should point to existing file + # vvv---- no check entrypoint exists in old_way (it happens later) ----vvv + # "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/ -o output/ -e path_na", + # vvv---- no such a check in old_way ----vvv + # "--mlcube-type metrics -m model/ -c config.yaml -t docker -r mlcube_root/ --o output/ -e empty_folder/", + ], + ), + # =============== + # Other options: requires_gpu + CliCase( + should_succeed=True, + new_way_lines=[ + # gpu may be disabled by passing --no-gpu + "-m model/ -c config.yaml -t docker --mlcube-type model -r mlcube_root/ -o output/ --no-gpu" + ], + old_way_lines=[ + "-m model/ -c config.yaml -t docker --mlcube-type model -r mlcube_root/ -o output/ -g False" + ], + expected_args={ + "mlcubedir": "mlcube_root/", + "outputdir": "output/", + "target": "docker", + "mlcube_type": "model", + "entrypoint_script": None, + "configfile": "config.yaml", + "modeldir": "model/", + "requires_gpu": False, + }, + ), + CliCase( # output folder may not exist (would be created) + should_succeed=True, + new_way_lines=[ + # gpu may be disabled by passing --no-gpu + "-m model/ -c config.yaml -t docker --mlcube-type model -r mlcube_root/ -o output_na/" + ], + old_way_lines=[ + "-m model/ -c config.yaml -t docker --mlcube-type model -r mlcube_root/ -o output_na/" + ], + expected_args={ + "mlcubedir": "mlcube_root/", + "outputdir": "output_na/", + "target": "docker", + "mlcube_type": "model", + "entrypoint_script": None, + "configfile": "config.yaml", + "modeldir": "model/", + "requires_gpu": True, + }, + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + patched_return_value=True, + ) diff --git a/testing/entrypoints/test_entrypoints_existence.py b/testing/entrypoints/test_entrypoints_existence.py new file mode 100644 index 000000000..926431381 --- /dev/null +++ b/testing/entrypoints/test_entrypoints_existence.py @@ -0,0 +1,46 @@ +import shlex +import subprocess # nosec B404 +import pytest +from GANDLF.entrypoints.subcommands import cli_subcommands as gandlf_commands + +old_way_entrypoints = [ + # old-way entrypoints + "gandlf_anonymizer --help", + "gandlf_collectStats --help", + "gandlf_configGenerator --help", + "gandlf_constructCSV --help", + "gandlf_debugInfo --help", + "gandlf_deploy --help", + "gandlf_generateMetrics --help", + "gandlf_optimizeModel --help", + "gandlf_patchMiner --help", + "gandlf_preprocess --help", + "gandlf_recoverConfig --help", + "gandlf_run --help", + "gandlf_verifyInstall --help", + "gandlf_splitCSV --help", +] + +main_cli_command = ["gandlf --version"] +# new-way CLI subcommands +new_way_cli_commands = [f"gandlf {cmd} --help" for cmd in gandlf_commands.keys()] + +# Combine static and dynamic commands +all_commands = old_way_entrypoints + main_cli_command + new_way_cli_commands + + +@pytest.mark.parametrize("command", all_commands) +def test_command_execution(command): + print(f"Running '{command}'...") + # Run the command and capture output, stderr, and exit status + command_split = shlex.split(command) + result = subprocess.run( # nosec B603 + command_split, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + assert ( + result.returncode == 0 + ), f"Command '{command}' failed with output:\n{result.stdout}" diff --git a/testing/entrypoints/test_generate_metrics.py b/testing/entrypoints/test_generate_metrics.py new file mode 100644 index 000000000..662d8cb98 --- /dev/null +++ b/testing/entrypoints/test_generate_metrics.py @@ -0,0 +1,116 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.generate_metrics import new_way, old_way +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.generate_metrics.generate_metrics_dict" +OLD_SCRIPT_NAME = "gandlf_generateMetrics" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("tmp_dir/"), + TmpFile("input.csv", content="SubjectID,Target,Prediction\n1,1.0,1.5\n2,0.5,0.3"), + TmpFile("config.yaml", content="foo: bar"), + TmpFile("output.json"), + TmpNoEx("output_na.csv"), + TmpNoEx("path_na"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--input-data input.csv --output-file output.json --config config.yaml --missing-prediction 666", + # tests short arg aliases + "-i input.csv -o output.json -c config.yaml -m 666", + # --raw-input param exists that do nothing + "-i input.csv -o output.json -c config.yaml --raw-input 123321 -m 666", + ], + old_way_lines=[ + "--inputdata input.csv --outputfile output.json --config config.yaml --missingprediction 666", + "--data_path input.csv --output_path output.json --parameters_file config.yaml --missingprediction 666", + "-i input.csv -o output.json -c config.yaml -m 666", + # --raw-input param exists that do nothing + "-i input.csv -o output.json -c config.yaml --rawinput 123321 -m 666", + "-i input.csv -o output.json -c config.yaml -rawinput 123321 -m 666", + ], + expected_args={ + "input_csv": "input.csv", + "config": "config.yaml", + "outputfile": "output.json", + "missing_prediction": 666, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # output is optional + "-i input.csv -c config.yaml" + ], + old_way_lines=["-i input.csv -c config.yaml"], + expected_args={ + "input_csv": "input.csv", + "config": "config.yaml", + "outputfile": None, + "missing_prediction": -1, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # output may not exist yet + "-i input.csv -o output_na.json -c config.yaml" + ], + old_way_lines=["-i input.csv -o output_na.json -c config.yaml"], + expected_args={ + "input_csv": "input.csv", + "config": "config.yaml", + "outputfile": "output_na.json", + "missing_prediction": -1, + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # input and config are required + "-o output.json -c config.yaml", + "-i input.csv -o output.json", + # input, config should point to existing file, not dir + # "-i tmp_dir/ -o output.json -c config.yaml", + "-i input.csv -o output.json -c path_na", + "-i input.csv -o output.json -c tmp_dir/", + # output if passed should not point to dir + "-i input.csv -o tmp_dir/ -c config.yaml", + ], + old_way_lines=[ + # input and config are required + "-o output.json -c config.yaml", + "-i input.csv -o output.json", + # input, config should point to existing file, not dir + # "-i path_na -o output.json -c config.yaml", # no check in old_way + # "-i tmp_dir/ -o output.json -c config.yaml", # no check in old_way + # "-i input.csv -o output.json -c path_na", # no check in old_way + # "-i input.csv -o output.json -c tmp_dir/", # no check in old_way + # output if passed should not point to dir + # "-i input.csv -o tmp_dir/ -c config.yaml", # no check in old_way + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_optimize_model.py b/testing/entrypoints/test_optimize_model.py new file mode 100644 index 000000000..f2002c72e --- /dev/null +++ b/testing/entrypoints/test_optimize_model.py @@ -0,0 +1,109 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.optimize_model import new_way, old_way +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.optimize_model.post_training_model_optimization" +OLD_SCRIPT_NAME = "gandlf_optimizeModel" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("tmp_dir/"), + TmpFile("model.pth.tar", content="123321"), + TmpFile("config.yaml", content="foo: bar"), + TmpNoEx("path_na"), + TmpDire("output/"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command with output + "--model model.pth.tar --config config.yaml --output-path output/", + # tests short arg aliases + "-m model.pth.tar -c config.yaml -o output/", + ], + old_way_lines=[ + "--model model.pth.tar --config config.yaml --output_path output/", + "-m model.pth.tar -c config.yaml -o output/", + ], + expected_args={ + "model_path": "model.pth.tar", + "config_path": "config.yaml", + "output_path": "output/", + "output_dir": None, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--model model.pth.tar --config config.yaml", + # tests short arg aliases + "-m model.pth.tar -c config.yaml", + ], + old_way_lines=[ + "--model model.pth.tar --config config.yaml", + "-m model.pth.tar -c config.yaml", + ], + expected_args={ + "model_path": "model.pth.tar", + "config_path": "config.yaml", + "output_dir": None, + "output_path": None, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # config is optional + "-m model.pth.tar" + ], + old_way_lines=["-m model.pth.tar"], + expected_args={ + "model_path": "model.pth.tar", + "config_path": None, + "output_path": None, + "output_dir": None, + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # model is required + "-c config.yaml", + # input, config should point to existing file, not dir + "-m path_na -c config.yaml", + "-m tmp_dir/ -c config.yaml", + "-m model.pth.tar -c path_na", + "-m model.pth.tar -c tmp_dir/", + ], + old_way_lines=[ + # model is required + "-c config.yaml", + # input, config should point to existing file, not dir + # "-m path_na -c config.yaml", # no check in old way + # "-m tmp_dir/ -c config.yaml", # no check in old way + # "-m model.pth.tar -c path_na", # no check in old way + # "-m model.pth.tar -c tmp_dir/", # no check in old way + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_patch_miner.py b/testing/entrypoints/test_patch_miner.py new file mode 100644 index 000000000..cbb2f56a4 --- /dev/null +++ b/testing/entrypoints/test_patch_miner.py @@ -0,0 +1,110 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.patch_miner import new_way, old_way +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.patch_miner.patch_extraction" +OLD_SCRIPT_NAME = "gandlf_patchMiner" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("tmp_dir/"), + TmpFile("input.csv", content="SubjectID,Target,Prediction\n1,1.0,1.5\n2,0.5,0.3"), + TmpFile("config.yaml", content="foo: bar"), + TmpDire("output/"), + TmpFile("output.csv"), + TmpNoEx("path_na"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--input-csv input.csv --output-dir output/ --config config.yaml", + # tests short arg aliases + "-i input.csv -o output/ -c config.yaml", + ], + old_way_lines=[ + "--input_CSV input.csv --output_path output/ --config config.yaml", + "-i input.csv -o output/ -c config.yaml", + ], + expected_args={ + "input_path": "input.csv", + "config": "config.yaml", + "output_path": "output/", + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # config is optional + "-i input.csv -o output/" + ], + old_way_lines=["-i input.csv -o output/"], + expected_args={ + "input_path": "input.csv", + "config": None, + "output_path": "output/", + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # output may not exist yet + "-i input.csv -o output_na/" + ], + old_way_lines=["-i input.csv -o output_na/"], + expected_args={ + "input_path": "input.csv", + "config": None, + "output_path": "output_na/", + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # input and output are required + "-o output/", + "-i input.csv", + # input should point to existing file, not dir + "-i path_na -o output/ -c config.yaml", + "-i tmp_dir/ -o output/ -c config.yaml", + # config if passed should point to existing file, not dir + "-i input.csv -o output/ -c path_na", + "-i input.csv -o output/ -c tmp_dir/", + # output should point to dir, not file + "-i input.csv -o output.csv -c config.yaml", + ], + old_way_lines=[ + # input and output are required + "-o output/", + "-i input.csv", + # input should point to existing file, not dir + # "-i path_na -o output/ -c config.yaml", # no check in old_way + # "-i tmp_dir/ -o output/ -c config.yaml", # no check in old_way + # config if passed should point to existing file, not dir + # "-i input.csv -o output/ -c path_na", # no check in old_way + # "-i input.csv -o output/ -c tmp_dir/", # no check in old_way + # output should point to dir, not file + # "-i input.csv -o output.csv -c config.yaml", # no check in old_way + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_preprocess.py b/testing/entrypoints/test_preprocess.py new file mode 100644 index 000000000..77397e060 --- /dev/null +++ b/testing/entrypoints/test_preprocess.py @@ -0,0 +1,145 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.preprocess import new_way, old_way +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.preprocess.preprocess_and_save" +OLD_SCRIPT_NAME = "gandlf_preprocess" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("tmp_dir/"), + TmpFile("input.csv", content="SubjectID,Target,Prediction\n1,1.0,1.5\n2,0.5,0.3"), + TmpFile("config.yaml", content="foo: bar"), + TmpDire("output/"), + TmpFile("output.csv"), + TmpNoEx("path_na"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--config config.yaml --input-data input.csv --output-dir output/ --label-pad constant --apply-augs --crop-zero", + # tests short arg aliases + "-c config.yaml -i input.csv -o output/ -l constant -a -z", + # checks --label-pad is optional with `constant` default value + "-c config.yaml -i input.csv -o output/ -a -z", + ], + old_way_lines=[ + "--config config.yaml --inputdata input.csv --output output/ --labelPad constant --applyaugs True --cropzero True", + "-c config.yaml -i input.csv -o output/ -l constant -a True -z True", + "-c config.yaml -i input.csv -o output/ -a True -z True", + ], + expected_args={ + "config_file": "config.yaml", + "data_csv": "input.csv", + "output_dir": "output/", + "label_pad_mode": "constant", + "applyaugs": True, + "apply_zero_crop": True, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # tests flags (--apply-augs, --crop-zero) + "-c config.yaml -i input.csv -o output/" + ], + old_way_lines=[ + "-c config.yaml -i input.csv -o output/", + # vvv--- don't work as any passed value is transformed to `True` + # "-c config.yaml -i input.csv -o output/ -a False -z False", + # "-c config.yaml -i input.csv -o output/ -a False -z False", + ], + expected_args={ + "config_file": "config.yaml", + "data_csv": "input.csv", + "output_dir": "output/", + "label_pad_mode": "constant", + "applyaugs": False, + "apply_zero_crop": False, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # tests --label-pad + "-c config.yaml -i input.csv -o output/ -l mean" + ], + old_way_lines=["-c config.yaml -i input.csv -o output/ -l mean"], + expected_args={ + "config_file": "config.yaml", + "data_csv": "input.csv", + "output_dir": "output/", + "label_pad_mode": "mean", + "applyaugs": False, + "apply_zero_crop": False, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # output may not exist yet + "-i input.csv -o output_na/ -c config.yaml" + ], + old_way_lines=["-i input.csv -o output_na/ -c config.yaml"], + expected_args={ + "config_file": "config.yaml", + "data_csv": "input.csv", + "output_dir": "output_na/", + "label_pad_mode": "constant", + "applyaugs": False, + "apply_zero_crop": False, + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # input, output and config are required + "-o output/ -c config.yaml", + "-i input.csv -c config.yaml", + "-i input.csv -o output/", + # input should point to existing file, not dir + "-i path_na -o output/ -c config.yaml", + "-i tmp_dir/ -o output/ -c config.yaml", + # config should point to existing file, not dir + "-i input.csv -o output/ -c path_na", + "-i input.csv -o output/ -c tmp_dir/", + # output should point to dir, not file + "-i input.csv -o output.csv -c config.yaml", + ], + old_way_lines=[ + # input, output and config are required + "-o output/ -c config.yaml", + "-i input.csv -c config.yaml", + "-i input.csv -o output/", + # input should point to existing file, not dir + # "-i path_na -o output/ -c config.yaml", # no check in old way + # "-i tmp_dir/ -o output/ -c config.yaml", # no check in old way + # config should point to existing file, not dir + # "-i input.csv -o output/ -c path_na", # no check in old way + # "-i input.csv -o output/ -c tmp_dir/", # no check in old way + # output should point to dir, not file + # "-i input.csv -o output.csv -c config.yaml", # no check in old way + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_recover_config.py b/testing/entrypoints/test_recover_config.py new file mode 100644 index 000000000..8ca6029a6 --- /dev/null +++ b/testing/entrypoints/test_recover_config.py @@ -0,0 +1,109 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.recover_config import new_way, old_way +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.recover_config.recover_config" +OLD_SCRIPT_NAME = "gandlf_preprocess" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("model/"), + TmpFile("model.file"), + TmpFile("output.yaml"), + TmpDire("output/"), + TmpNoEx("output_na.yaml"), + TmpNoEx("path_na"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + # also checks --mlcube is optional + "--model-dir model/ --output-file output.yaml", + # tests short arg aliases + "-m model/ -o output.yaml", + ], + old_way_lines=[ + "--modeldir model/ --outputFile output.yaml", + "-m model/ -o output.yaml", + ], + expected_args={"modelDir": "model/", "outputFile": "output.yaml"}, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # mlcube way + "--mlcube --output-file output.yaml", + # tests short arg aliases + "-c -o output.yaml", + ], + old_way_lines=[ + # same as for new way + "--mlcube true -o output.yaml", + "-c true -o output.yaml", + ], + expected_args={"modelDir": "/embedded_model/", "outputFile": "output.yaml"}, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # tests model is ignored when mlcube is passed + "-m model/ -c -o output.yaml" + ], + old_way_lines=["-m model/ -c true -o output.yaml"], + expected_args={"modelDir": "/embedded_model/", "outputFile": "output.yaml"}, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # tests output may not exist + "-m model/ -o output_na.yaml" + ], + old_way_lines=["-m model/ -o output_na.yaml"], + expected_args={"modelDir": "model/", "outputFile": "output_na.yaml"}, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # output is required + "-m model/", + "-c", + # model if passed should point to existing dir + "-m path_na -o output.yaml", + "-m model.file -o output.yaml", + # output should point to file, not dir + "-m model/ -o output/", + ], + old_way_lines=[ + # output is required + "-m model/", # no check in old way + "-c", # no check in old way + # model if passed should point to existing dir + # "-m path_na -o output.yaml", # no check in old way + # "-m model.file -o output.yaml", # no check in old way + # output should point to file, not dir + # "-m model/ -o output/", # no check in old way + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + patched_return_value=True, + ) diff --git a/testing/entrypoints/test_run.py b/testing/entrypoints/test_run.py new file mode 100644 index 000000000..c8fdede81 --- /dev/null +++ b/testing/entrypoints/test_run.py @@ -0,0 +1,274 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.run import new_way, old_way +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.run.main_run" +OLD_SCRIPT_NAME = "gandlf_patchMiner" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +csv_content = "SubjectID,Target,Prediction\n1,1.0,1.5\n2,0.5,0.3" +test_file_system = [ + TmpFile("input.csv", content=csv_content), + TmpFile("train.csv", content=csv_content), + TmpFile("val.csv", content=csv_content), + TmpDire("input/"), + TmpFile("input/data.csv", content=csv_content), + TmpFile("config.yaml", content="foo: bar"), + TmpDire("config_dir/"), + TmpDire("model/"), + TmpFile("model.file"), + TmpDire("output/"), + TmpFile("output.csv"), + TmpNoEx("output_na/"), + TmpNoEx("path_na"), +] +# No tests for weird combinations: train + output-path, inference + reset/resume, as behavior is undefined +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command except --resume, --output-path + "--config config.yaml --input-data input.csv --train --model-dir model/ " + + "--device cuda --reset", + # tests short arg aliases + "-c config.yaml -i input.csv -t -m model/ -d cuda -rt", + # test presence of --raw-input (and its uselessness) + "-c config.yaml -i input.csv -t -m model/ -d cuda -rt --raw-input blabla", + ], + old_way_lines=[ + "--config config.yaml --inputdata input.csv --train True --modeldir model/ --device cuda --reset True", + "--parameters_file config.yaml --data_path input.csv --train True --modeldir model/ --device cuda --reset True", + "-c config.yaml -i input.csv -t True -m model/ -d cuda -rt True", + # test presence of --raw-input (and its uselessness) + "-c config.yaml -i input.csv -t True -m model/ -d cuda -rt True --rawinput blabla", + "-c config.yaml -i input.csv -t True -m model/ -d cuda -rt True -rawinput blabla", + ], + expected_args={ + "data_csv": "input.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": True, + "device": "cuda", + "reset": True, + "resume": False, + "output_dir": None, + }, + ), + CliCase( + should_succeed=True, + new_way_lines=[ + # --resume instead of --reset + "-c config.yaml -i input.csv -t -m model/ -d cuda --resume", + "-c config.yaml -i input.csv -t -m model/ -d cuda -rm", + ], + old_way_lines=[ + "-c config.yaml -i input.csv -t True -m model/ -d cuda --resume True", + "-c config.yaml -i input.csv -t True -m model/ -d cuda -rm True", + ], + expected_args={ + "data_csv": "input.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": True, + "device": "cuda", + "reset": False, + "resume": True, + "output_dir": None, + }, + ), + CliCase( # inference mode + --output-path + should_succeed=True, + new_way_lines=[ + "-c config.yaml -i input.csv --infer -m model/ -d cuda --output-path output/", + "-c config.yaml -i input.csv --infer -m model/ -d cuda -o output/", + ], + old_way_lines=[ + "-c config.yaml -i input.csv -t False -m model/ -d cuda -o output/" + ], + expected_args={ + "data_csv": "input.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": False, + "device": "cuda", + "reset": False, + "resume": False, + "output_dir": "output/", + }, + ), + CliCase( # check that `model_dir` can be skipped (used output instead) + should_succeed=True, + new_way_lines=[ + "-c config.yaml -i input.csv --train -d cuda -o output/", + "-c config.yaml -i input.csv --infer -d cuda -o output/", + ], + old_way_lines=[ + "-c config.yaml -i input.csv -t True -d cuda -o output/", + "-c config.yaml -i input.csv -t False -d cuda -o output/", + ], + expected_args={ + "data_csv": "input.csv", + "config_file": "config.yaml", + "model_dir": "output/", + "train_mode": ..., + "device": "cuda", + "reset": False, + "resume": False, + "output_dir": "output/", + }, + ), + CliCase( # check that both output + model cannot be empty simultaneously + should_succeed=False, + new_way_lines=[ + "-c config.yaml -i input.csv --train -d cuda", + "-c config.yaml -i input.csv --infer -d cuda", + ], + old_way_lines=[ + "-c config.yaml -i input.csv -t True -d cuda", + "-c config.yaml -i input.csv -t False -d cuda", + ], + ), + CliCase( # check device + should_succeed=True, + new_way_lines=[ + "-c config.yaml -i input.csv --train -m model/ -d cpu -o output/", + "-c config.yaml -i input.csv --infer -m model/ -d cpu -o output/", + ], + old_way_lines=[ + "-c config.yaml -i input.csv -t True -m model/ -d cpu -o output/", + "-c config.yaml -i input.csv -t False -m model/ -d cpu -o output/", + ], + expected_args={ + "data_csv": "input.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": ..., + "device": "cpu", + "reset": False, + "resume": False, + "output_dir": "output/", + }, + ), + CliCase( # reset + resume simultaneously => disabling reset in favor of resume + should_succeed=True, + new_way_lines=[ + "-c config.yaml -i input.csv --train -m model/ -d cpu -o output/ -rt -rm" + ], + old_way_lines=[ + "-c config.yaml -i input.csv -t True -m model/ -d cpu -o output/ -rt True -rm True" + ], + expected_args={ + "data_csv": "input.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": True, + "device": "cpu", + "reset": False, + "resume": True, + "output_dir": "output/", + }, + ), + CliCase( # input data may point to folder with 'data.csv' + should_succeed=True, + new_way_lines=["-c config.yaml -i input/ --train -m model/ -d cpu"], + old_way_lines=["-c config.yaml -i input/ -t True -m model/ -d cpu"], + expected_args={ + "data_csv": "input/data.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": True, + "device": "cpu", + "reset": False, + "resume": False, + "output_dir": None, + }, + ), + CliCase( # input data may point to comma-separated list of csvs + should_succeed=True, + new_way_lines=["-c config.yaml -i train.csv,val.csv --train -m model/ -d cpu"], + old_way_lines=["-c config.yaml -i train.csv,val.csv -t True -m model/ -d cpu"], + expected_args={ + "data_csv": "train.csv,val.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": True, + "device": "cpu", + "reset": False, + "resume": False, + "output_dir": None, + }, + ), + CliCase( # output-path may point to non-existent path + should_succeed=True, + new_way_lines=[ + "-c config.yaml -i input.csv --train -m model/ -d cpu -o output_na/" + ], + old_way_lines=[ + "-c config.yaml -i input.csv -t True -m model/ -d cpu -o output_na/" + ], + expected_args={ + "data_csv": "input.csv", + "config_file": "config.yaml", + "model_dir": "model/", + "train_mode": True, + "device": "cpu", + "reset": False, + "resume": False, + "output_dir": "output_na/", + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # config, input-data, train/infer, device are required + " -i input/ --train -m model/ -d cpu", + "-c config.yaml --train -m model/ -d cpu", + "-c config.yaml -i input/ -m model/ -d cpu", + "-c config.yaml -i input/ --train -m model/ ", + # config should point to existing file + "-c config_dir/ -i input/ --train -m model/ -d cpu", + "-c path_na -i input/ --train -m model/ -d cpu", + # output should not point to file + "-c config.yaml -i input/ --train -d cpu -o output.csv", + # model should not point to file + "-c config.yaml -i input/ --train -m model.file -d cpu", + # device should not support anything other beside cuda/cpu + "-c config.yaml -i input/ --train -m model/ -d mps", + ], + old_way_lines=[ + # config, input-data, train/infer, device are required + " -i input/ -t True -m model/ -d cpu", + "-c config.yaml -t True -m model/ -d cpu", + "-c config.yaml -i input/ -m model/ -d cpu", + "-c config.yaml -i input/ -t True -m model/ ", + # config should point to existing file + "-c config_dir/ -i input/ -t True -m model/ -d cpu", + "-c path_na -i input/ --train -m model/ -d cpu", + # output should not point to file + # "-c config.yaml -i input/ -t True -d cpu -o output.csv", # no such check in old way + # model should not point to file + # "-c config.yaml -i input/ -t True -m model.file -d cpu", # no such check in old way + # device should not support anything other beside cuda/cpu + # "-c config.yaml -i input/ -t True -m model/ -d mps", # no such check in old way + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_split_csv.py b/testing/entrypoints/test_split_csv.py new file mode 100644 index 000000000..aa516eea6 --- /dev/null +++ b/testing/entrypoints/test_split_csv.py @@ -0,0 +1,94 @@ +import os.path +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.split_csv import new_way, old_way + +from . import CliCase, run_test_case, TmpDire, TmpFile, TmpNoEx + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.split_csv.split_data_and_save_csvs" +OLD_SCRIPT_NAME = "gandlf_splitCSV" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [ + TmpDire("input/"), + TmpFile("input.csv", content="col1,col2\n123,456\n"), + TmpFile("config.yaml", content="foo: bar"), + TmpFile("config.txt", "@not-a-yaml-content"), + TmpDire("config/"), + TmpDire("output/"), + TmpFile("output.csv", content="col1,col2\n123,456\n"), + TmpNoEx("path_na"), +] +test_cases = [ + CliCase( + should_succeed=True, + new_way_lines=[ + # full command + "--input-csv input.csv --output-dir output/ --config config.yaml", + # tests short arg aliases + "-i input.csv -o output/ -c config.yaml", + ], + old_way_lines=[ + "--inputCSV input.csv --outputDir output/ --config config.yaml", + "-i input.csv -o output/ -c config.yaml", + ], + expected_args={ + "input_data": os.path.normpath("input.csv"), + "output_dir": os.path.normpath("output/"), + "parameters": {"foo": "bar"}, + }, + ), + CliCase( + should_succeed=False, + new_way_lines=[ + # tests that input, output, config are required + " -o output/ -c config.yaml", + "-i input.csv -c config.yaml", + "-i input.csv -o output/ ", + # tests that input points to existing file + "-i input/ -o output/ -c config.yaml", + "-i path_na -o output/ -c config.yaml", + # tests that output points to existing dir + "-i input.csv -o output.csv -c config.yaml", + "-i input.csv -o path_na -c config.yaml", + # tests that config points to existing yaml + "-i input.csv -o output/ -c config.txt", + "-i input.csv -o output/ -c config/", + "-i input.csv -o output/ -c path_na", + ], + old_way_lines=[ + # tests that input, output, config are required + " -o output/ -c config.yaml", + "-i input.csv -c config.yaml", + "-i input.csv -o output/ ", + # tests that input points to existing file + # "-i input/ -o output/ -c config.yaml", # no check in old way + # "-i path_na -o output/ -c config.yaml", # no check in old way + # tests that output points to existing dir + # "-i input.csv -o output.csv -c config.yaml", # no check in old way + # "-i input.csv -o path_na -c config.yaml", # no check in old way + # tests that config points to existing yaml + "-i input.csv -o output/ -c config.txt", + "-i input.csv -o output/ -c config/", + "-i input.csv -o output/ -c path_na", + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/entrypoints/test_verify_install.py b/testing/entrypoints/test_verify_install.py new file mode 100644 index 000000000..af8a6d4ce --- /dev/null +++ b/testing/entrypoints/test_verify_install.py @@ -0,0 +1,34 @@ +import pytest +from click.testing import CliRunner + +from GANDLF.entrypoints.verify_install import new_way, old_way + +from . import CliCase, run_test_case + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "GANDLF.entrypoints.verify_install._verify_install" +OLD_SCRIPT_NAME = "gandlf_verifyInstall" + + +# subcommand is trivial, we just check both new_way and old_way run successfully +test_file_system = [] +test_cases = [ + CliCase( + should_succeed=True, new_way_lines=[""], old_way_lines=[""], expected_args={} + ) +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(cli_runner: CliRunner, case: CliCase): + run_test_case( + cli_runner=cli_runner, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=new_way, + old_way=old_way, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/testing/test_deploy.sh b/testing/test_deploy.sh index e6e8f04cf..cfb6ded68 100644 --- a/testing/test_deploy.sh +++ b/testing/test_deploy.sh @@ -42,11 +42,11 @@ cp ../../samples/config_getting_started_segmentation_rad3d.yaml . #### Training #### ################## -gandlf_run \ +gandlf run \ -c ./config_getting_started_segmentation_rad3d.yaml \ -i ./data.csv \ -m ./trained_model_output \ - -t True \ + -t \ -d cpu # remove data.csv to assume that we need a custom script with gandlf deploy @@ -56,25 +56,27 @@ rm data.csv #### deploy #### ################ +echo "Starting model deploy..." # deploy model mkdir model_mlcube cp $MODEL_MLCUBE_TEMPLATE model_mlcube/mlcube.yaml -gandlf_deploy \ +gandlf deploy \ -c ./config_getting_started_segmentation_rad3d.yaml \ -m ./trained_model_output \ --target docker \ --mlcube-root ./model_mlcube \ -o ./built_model_mlcube \ --mlcube-type model \ - -g False \ + --no-gpu \ --entrypoint $MODEL_MLCUBE_ENTRYPOINT +echo "Starting metrics deploy..." # deploy metrics mkdir metrics_mlcube cp $METRICS_MLCUBE_TEMPLATE metrics_mlcube/mlcube.yaml -gandlf_deploy \ +gandlf deploy \ --target docker \ --mlcube-root ./metrics_mlcube \ -o ./built_metrics_mlcube \ @@ -85,25 +87,31 @@ gandlf_deploy \ #### run pipeline #### ###################### +echo "Starting model pipeline run..." + mlcube run \ --mlcube ./built_model_mlcube \ --task infer \ - data_path=../../3d_rad_segmentation \ - output_path=../../predictions + input-data=../../3d_rad_segmentation \ + output-path=../../predictions + +echo "Starting metrics pipeline run..." mlcube run \ --mlcube ./built_metrics_mlcube \ --task evaluate \ predictions=../../predictions \ labels=../../3d_rad_segmentation \ - output_path=../../results.yaml \ - parameters_file=../../config_getting_started_segmentation_rad3d.yaml + output-file=../../results.yaml \ + config=../../config_getting_started_segmentation_rad3d.yaml ############### #### check #### ############### +echo "Checking results..." + if [ -f "results.yaml" ]; then echo "Success" cd .. diff --git a/testing/test_full.py b/testing/test_full.py index 95edec65c..f80bd9257 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -3,6 +3,7 @@ import SimpleITK as sitk import numpy as np import pandas as pd +import logging from pydicom.data import get_testdata_file import cv2 @@ -58,6 +59,7 @@ "uinc", "msdnet", "imagenet_unet", + "dynunet", ] # pre-defined regression/classification model types for testing all_models_regression = [ @@ -238,9 +240,7 @@ def write_temp_config_path(parameters_to_write): return temp_config_path -# # these are helper functions to be used in other tests - - +# these are helper functions to be used in other tests def test_train_segmentation_rad_2d(device): print("03: Starting 2D Rad segmentation tests") # read and parse csv @@ -273,6 +273,13 @@ def test_train_segmentation_rad_2d(device): parameters["model"]["converter_type"] = random.choice( ["acs", "soft", "conv3d"] ) + + if model == "dynunet": + # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py + parameters["model"]["kernel_size"] = (3, 3, 3, 1) + parameters["model"]["strides"] = (1, 1, 1, 1) + parameters["model"]["deep_supervision"] = False + parameters["model"]["architecture"] = model parameters["nested_training"]["testing"] = -5 parameters["nested_training"]["validation"] = -5 @@ -364,6 +371,13 @@ def test_train_segmentation_rad_3d(device): parameters["model"]["converter_type"] = random.choice( ["acs", "soft", "conv3d"] ) + + if model == "dynunet": + # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py + parameters["model"]["kernel_size"] = (3, 3, 3, 1) + parameters["model"]["strides"] = (1, 1, 1, 1) + parameters["model"]["deep_supervision"] = False + parameters["model"]["architecture"] = model parameters["nested_training"]["testing"] = -5 parameters["nested_training"]["validation"] = -5 @@ -767,7 +781,9 @@ def test_train_inference_optimize_classification_rad_3d(device): # file_config_temp = write_temp_config_path(parameters_temp) model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") config_path = os.path.join(outputDir, "parameters.pkl") - optimization_result = post_training_model_optimization(model_path, config_path) + optimization_result = post_training_model_optimization( + model_path, config_path, outputDir + ) assert optimization_result == True, "Optimization should pass" ## testing inference @@ -1743,10 +1759,14 @@ def test_generic_preprocess_functions(): ## image rescaling test input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) # try out different options + input_tensor_min, input_tensor_max = ( + input_tensor.min().item(), + input_tensor.max().item(), + ) for params in [ {}, None, - {"in_min_max": [5, 250], "out_min_max": [-1, 2]}, + {"in_min_max": [input_tensor_min, input_tensor_max], "out_min_max": [-1, 2]}, {"out_min_max": [0, 1], "percentiles": [5, 95]}, ]: rescaler = global_preprocessing_dict["rescale"](params) @@ -3060,11 +3080,6 @@ def test_generic_cli_function_metrics_cli_rad_nd(): labels_array = training_data["Channel_0"] else: labels_array = training_data["ValueToPredict"] - training_data["target"] = labels_array - training_data["prediction"] = labels_array - if synthesis_detected: - # this optional - training_data["mask"] = training_data["Label"] # read and initialize parameters for specific data dimension parameters = ConfigManager( @@ -3082,17 +3097,46 @@ def test_generic_cli_function_metrics_cli_rad_nd(): if synthesis_detected: parameters["problem_type"] = problem_type + temp_config = write_temp_config_path(parameters) + + # check both single csv input and comma-separated input + # # single csv input + training_data["target"] = labels_array + training_data["prediction"] = labels_array + if synthesis_detected: + # this optional + training_data["mask"] = training_data["Label"] + temp_infer_csv = os.path.join(outputDir, "temp_csv.csv") training_data.to_csv(temp_infer_csv, index=False) + # run the metrics calculation + output_file = os.path.join(outputDir, "output_single-csv.json") + generate_metrics_dict(temp_infer_csv, temp_config, output_file) - output_file = os.path.join(outputDir, "output.yaml") + assert os.path.isfile( + output_file + ), "Metrics output file was not generated for single-csv input" - temp_config = write_temp_config_path(parameters) + # # comma-separated input + temp_infer_csv_gt = os.path.join(outputDir, "temp_csv_gt.csv") + temp_infer_csv_pred = os.path.join(outputDir, "temp_csv_pred.csv") + # create target_data from training_data using just subjectid and target columns + target_data = training_data[["SubjectID", "target"]].copy() + target_data.to_csv(temp_infer_csv_gt, index=False) + + # create prediction_data from training_data using just subjectid and prediction columns + prediction_data = training_data[["SubjectID", "prediction"]].copy() + prediction_data.to_csv(temp_infer_csv_pred, index=False) # run the metrics calculation - generate_metrics_dict(temp_infer_csv, temp_config, output_file) + output_file = os.path.join(outputDir, "output_comma-separated-csv.json") + generate_metrics_dict( + temp_infer_csv_gt + "," + temp_infer_csv_pred, temp_config, output_file + ) - assert os.path.isfile(output_file), "Metrics output file was not generated" + assert os.path.isfile( + output_file + ), "Metrics output file was not generated for comma-separated input" sanitize_outputDir() @@ -3145,3 +3189,51 @@ def test_generic_data_split(): sanitize_outputDir() print("passed") + + +def test_generic_logging(capsys): + print("52: Starting test for logging") + log_file = "testing/gandlf.log" + logger_setup(log_file) + message = "Testing logging" + + logging.debug(message) + + # tests if the message is in the file.log + with open(log_file, "r") as file: + logs = file.read() + assert message in logs + + os.remove(log_file) + + # test the stout info level. The stout must show only INFO messages + message = "Testing stout logging" + logging.info(message) + capture = capsys.readouterr() + assert message in capture.out + + # Test the stout not showing other messages + message = "Testing stout logging" + logging.debug(message) + logging.warning(message) + logging.error(message) + logging.critical(message) + capture = capsys.readouterr() + assert message not in capture.out + + # test sterr must NOT show these messages. + message = "Testing sterr logging" + logging.info(message) + logging.debug(message) + capture = capsys.readouterr() + assert message not in capture.err + + # test sterr must show these messages. + logging.error(message) + logging.warning(message) + logging.critical(message) + capture = capsys.readouterr() + assert message in capture.err + + sanitize_outputDir() + print("passed") diff --git a/testing/test_update_version.py b/testing/test_update_version.py new file mode 100644 index 000000000..4619bf053 --- /dev/null +++ b/testing/test_update_version.py @@ -0,0 +1,51 @@ +import pytest +import sys +from pathlib import Path +from .entrypoints import CliCase, run_test_case + +parent = str(Path(__file__).parent.parent.absolute()) +sys.path.append(parent) + +from update_version import main + +# This function is a place where a real logic is executed. +# For tests, we replace it with mock up, and check if this function is called +# with proper args for different cli commands +MOCK_PATH = "update_version._update_version" +OLD_SCRIPT_NAME = "update_version.py" + +# these files would be either created temporarily for test execution, +# or we ensure they do not exist +test_file_system = [] +test_cases = [ + CliCase( + should_succeed=True, + old_way_lines=[ + # long and short versions + "--old-version 0.18 --new-version 0.19", + "-ov 0.18 -nv 0.19", + ], + expected_args={"old_version": "0.18", "new_version": "0.19"}, + ), + CliCase( + should_succeed=False, + old_way_lines=[ + # both args are required + "-ov 0.18", + "-nv 0.19", + ], + ), +] + + +@pytest.mark.parametrize("case", test_cases) +def test_case(case: CliCase): + run_test_case( + cli_runner=None, + file_system_config=test_file_system, + case=case, + real_code_function_path=MOCK_PATH, + new_way=None, + old_way=main, + old_script_name=OLD_SCRIPT_NAME, + ) diff --git a/tutorials/classification_medmnist_notebook/config.yaml b/tutorials/classification_medmnist_notebook/config.yaml index 309860336..20d9ef784 100644 --- a/tutorials/classification_medmnist_notebook/config.yaml +++ b/tutorials/classification_medmnist_notebook/config.yaml @@ -2,10 +2,9 @@ version: { minimum: 0.0.14, - maximum: 0.0.20 # this should NOT be made a variable, but should be tested after every tag is created + maximum: 0.1.0-dev # this should NOT be made a variable, but should be tested after every tag is created } # Choose the model parameters here - model: { dimension: 2, # the dimension of the model and dataset: defines dimensionality of computations diff --git a/tutorials/classification_medmnist_notebook/tutorial.ipynb b/tutorials/classification_medmnist_notebook/tutorial.ipynb index c0de72f0d..4609bf24d 100644 --- a/tutorials/classification_medmnist_notebook/tutorial.ipynb +++ b/tutorials/classification_medmnist_notebook/tutorial.ipynb @@ -395,7 +395,7 @@ } ], "source": [ - "!python ../gandlf_run -c ./config.yaml -i medmnist/dataset/train_path_full.csv,medmnist/dataset/val_path_full.csv -m model/ -t True -d cuda" + "!gandlf run -c ./config.yaml -i medmnist/dataset/train_path_full.csv,medmnist/dataset/val_path_full.csv -m model/ --train -d cuda" ] }, { @@ -563,7 +563,7 @@ } ], "source": [ - "!python ../gandlf_run -c config.yaml -i ./medmnist/dataset/test_path_full.csv -m ./model/ -t False -d cuda" + "!gandlf run -c config.yaml -i ./medmnist/dataset/test_path_full.csv -m ./model/ --infer -d cuda" ] }, { diff --git a/tutorials/classification_pathmnist_notebook/classification_tutorial.ipynb b/tutorials/classification_pathmnist_notebook/classification_tutorial.ipynb index 187eed3a8..78e31f046 100644 --- a/tutorials/classification_pathmnist_notebook/classification_tutorial.ipynb +++ b/tutorials/classification_pathmnist_notebook/classification_tutorial.ipynb @@ -205,7 +205,7 @@ "source": [ "-e is short for editable, and . indicates that we are installing from the current directory (GaNDLF). Essentially, we are installing the packages in an editable mode because this allows us to change the source code in the packages without having to redownload the packages after we make modifications. \n", "\n", - "Now, let's use gandlf_verifyInstall to verify our GaNDLF installation. " + "Now, let's use gandlf `verify-install` command to verify our GaNDLF installation. " ] }, { @@ -215,7 +215,7 @@ "metadata": {}, "outputs": [], "source": [ - "!python ./gandlf_verifyInstall\n" + "!gandlf verify-install\n" ] }, { @@ -715,7 +715,7 @@ "metadata": {}, "outputs": [], "source": [ - "!python /content/GaNDLF/gandlf_run -c /content/config.yaml -i /content/medmnist/dataset/train_path_full.csv,/content/medmnist/dataset/val_path_full.csv -m /content/model/ -t True -d cuda\n" + "!gandlf run -c /content/config.yaml -i /content/medmnist/dataset/train_path_full.csv,/content/medmnist/dataset/val_path_full.csv -m /content/model/ --train -d cuda" ] }, { @@ -783,7 +783,7 @@ "metadata": {}, "outputs": [], "source": [ - "!python /content/GaNDLF/gandlf_run -c /content/config.yaml -i /content/medmnist/dataset/test_path_full.csv -m /content/model/ -t False -d cuda" + "!gandlf run -c /content/config.yaml -i /content/medmnist/dataset/test_path_full.csv -m /content/model/ --infer -d cuda" ] }, { diff --git a/gandlf_updateVersion b/update_version.py similarity index 56% rename from gandlf_updateVersion rename to update_version.py index 5d32f0361..c07093761 100644 --- a/gandlf_updateVersion +++ b/update_version.py @@ -1,61 +1,33 @@ #!usr/bin/env python # -*- coding: utf-8 -*- -import argparse, os, fileinput +import argparse +import os +import fileinput -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog="GANDLF_UpdateVersion", - formatter_class=argparse.RawTextHelpFormatter, - description="Update versions when creating a new release of GaNDLF, also useful when updating the version for development.\n\n", - ) - parser.add_argument( - "-ov", - "--old_version", - metavar="", - type=str, - required=True, - help="The old version number", - ) - parser.add_argument( - "-nv", - "--new_version", - metavar="", - type=str, - required=True, - help="The new version number", - ) +def in_place_string_replace(filename: str, old_string: str, new_string: str) -> None: + """ + Replace a string in a file in place. - args = parser.parse_args() + Args: + filename (str): The file to replace the string in + old_string (str): The string to replace + new_string (str): The string to replace with + """ + if os.path.exists(filename): + with fileinput.FileInput(filename, inplace=True) as file: + for line in file: + print(line.replace(old_string, new_string), end="") - def in_place_string_replace( - filename: str, old_string: str, new_string: str - ) -> None: - """ - Replace a string in a file in place. - - Args: - filename (str): The file to replace the string in - old_string (str): The string to replace - new_string (str): The string to replace with - """ - if os.path.exists(filename): - with fileinput.FileInput(filename, inplace=True) as file: - for line in file: - print(line.replace(old_string, new_string), end="") +def _update_version(old_version: str, new_version: str): cwd = os.getcwd() in_place_string_replace( - os.path.join(cwd, "GANDLF/version.py"), - args.old_version, - args.new_version, + os.path.join(cwd, "GANDLF/version.py"), old_version, new_version ) # find all yaml files in samples and testing directories - folders_to_iterate = [ - os.path.join(cwd, "samples"), - os.path.join(cwd, "testing"), - ] + folders_to_iterate = [os.path.join(cwd, "samples"), os.path.join(cwd, "testing")] files_where_version_is_stored = [ os.path.join(cwd, "mlcube/model_mlcube/workspace/config.yml"), @@ -69,11 +41,43 @@ def in_place_string_replace( if file.endswith(".yaml") or file.endswith(".yml"): files_where_version_is_stored.append(os.path.join(folder, file)) - args.old_version = args.old_version.replace("-dev", "") - args.new_version = args.new_version.replace("-dev", "") + old_version = old_version.replace("-dev", "") + new_version = new_version.replace("-dev", "") # update the version.py file for filename in files_where_version_is_stored: - in_place_string_replace(filename, args.old_version, args.new_version) + in_place_string_replace(filename, old_version, new_version) print("Version updated successfully in `version.py` and all configuration files!") + + +def main(): + parser = argparse.ArgumentParser( + prog="Update GaNDLF version", + formatter_class=argparse.RawTextHelpFormatter, + description="Update versions when creating a new release of GaNDLF, also useful when updating the version for development.\n\n", + ) + parser.add_argument( + "-ov", + "--old-version", + metavar="", + type=str, + required=True, + help="The old version number", + ) + parser.add_argument( + "-nv", + "--new-version", + metavar="", + type=str, + required=True, + help="The new version number", + ) + + args = parser.parse_args() + + _update_version(args.old_version, args.new_version) + + +if __name__ == "__main__": + main()