diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 06ce1b0991..21b575640a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,4 +2,8 @@ # Unless a later match takes precedence,they will be requested for review when someone opens a pull request. * @mlcommons/wg-ck -/CODEOWNERS @mlcommons/staff +/.github/CODEOWNERS @mlcommons/systems + +/.github/workflows/cla.yml @mlcommons/systems + +/LICENSE.md @mlcommons/systems diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml new file mode 100644 index 0000000000..496aeda757 --- /dev/null +++ b/.github/workflows/build_wheel.yml @@ -0,0 +1,97 @@ +name: Build wheel and release into PYPI + +on: + release: + types: [published] + push: + branches: + - main + - mlperf-inference + paths: + - VERSION + - setup.py + + +jobs: + build_wheels: + if: github.repository_owner == 'mlcommons' + name: Build wheel + runs-on: ubuntu-latest + environment: release + + permissions: + id-token: write + contents: write + + strategy: + fail-fast: false + steps: + # Step 1: Checkout the code + - uses: actions/checkout@v4 + with: + fetch-depth: 2 + ssh-key: ${{ secrets.DEPLOY_KEY }} + + # Step 2: Set up Python + - uses: actions/setup-python@v3 + + # Step 3: Check if VERSION file has changed in this push + - name: Check if VERSION file has changed + id: version_changed + run: | + if git diff --name-only HEAD~1 | grep -q "VERSION"; then + echo "VERSION file has been modified" + echo "::set-output name=version_changed::true" + new_version=$(cat VERSION) + else + echo "VERSION file has NOT been modified" + echo "::set-output name=version_changed::false" + fi + echo "::set-output name=new_version::$new_version" + + # Step 4: Increment version if VERSION was not changed + - name: Increment version if necessary + id: do_version_increment + if: steps.version_changed.outputs.version_changed == 'false' + run: | + # Check if VERSION file exists, else initialize it + if [ ! -f VERSION ]; then + echo "0.0.0" > VERSION + fi + + version=$(cat VERSION) + IFS='.' read -r major minor patch <<< "$version" + patch=$((patch + 1)) + new_version="$major.$minor.$patch" + echo $new_version > VERSION + echo "New version: $new_version" + echo "::set-output name=new_version::$new_version" + + # Step 5: Commit the updated version to the repository + - name: Commit updated version + if: steps.version_changed.outputs.version_changed == 'false' + run: | + git config --global user.name "${{ github.actor }}" + git config --global user.email "${{ github.actor }}@users.noreply.github.com" + git add VERSION + git commit -m "Increment version to ${{ steps.do_version_increment.outputs.new_version }}" + git push + + # Step 6: Install required dependencies + - name: Install requirements + run: python3 -m pip install setuptools wheel build + + # Step 7: Build the Python wheel + - name: Build wheels + working-directory: ./ + run: python3 -m build && rm dist/*.whl + + # Step 8: Publish to PyPI + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verify-metadata: true + skip-existing: true + packages-dir: dist + repository-url: https://upload.pypi.org/legacy/ + verbose: true diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish-docs.yaml similarity index 89% rename from .github/workflows/publish.yaml rename to .github/workflows/publish-docs.yaml index fa9ace5da0..e720258ecd 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish-docs.yaml @@ -1,6 +1,6 @@ # This is a basic workflow to help you get started with Actions -name: Publish site +name: Publish docs site on: @@ -11,6 +11,9 @@ on: - main - docs - mlperf-inference + paths: + - docs/** + - mkdocs.yml jobs: diff --git a/.github/workflows/run-individual-script-tests.yml b/.github/workflows/run-individual-script-tests.yml new file mode 100644 index 0000000000..e6c3db4b5a --- /dev/null +++ b/.github/workflows/run-individual-script-tests.yml @@ -0,0 +1,37 @@ +# This workflow will run configured tests for any updated CM scripts +name: Individual CM script Tests + +on: + pull_request: + branches: [ "main", "mlperf-inference", "dev" ] + paths: + - 'script/**_cm.json' + - 'script/**_cm.yml' + +jobs: + run-script-tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + test-input-index: [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11+" ] + steps: + - name: 'Checkout' + uses: actions/checkout@v4 + with: + fetch-depth: 2 + - name: Get changed files + id: getfile + run: | + git remote add upstream ${{ github.event.pull_request.base.repo.clone_url }} + git fetch upstream + echo "files=$(git diff upstream/${{ github.event.pull_request.base.ref }} --name-only | xargs)" >> $GITHUB_OUTPUT + - name: RUN Script Tests + run: | + echo ${{ steps.getfile.outputs.files }} + for file in ${{ steps.getfile.outputs.files }}; do + echo $file + done + python3 -m pip install cmind + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + DOCKER_CM_REPO=${{ github.event.pull_request.head.repo.html_url }} DOCKER_CM_REPO_BRANCH=${{ github.event.pull_request.head.ref }} TEST_INPUT_INDEX=${{ matrix.test-input-index }} python3 tests/script/process_tests.py ${{ steps.getfile.outputs.files }} diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml new file mode 100644 index 0000000000..f753fab8d1 --- /dev/null +++ b/.github/workflows/test-amd-mlperf-inference-implementations.yml @@ -0,0 +1,26 @@ +name: MLPerf Inference AMD implementations + +on: + schedule: + - cron: "29 4 * * *" #to be adjusted + +jobs: + build_nvidia: + if: github.repository_owner == 'gateoverflow' + runs-on: [ self-hosted, linux, x64, GO-spr ] + strategy: + fail-fast: false + matrix: + python-version: [ "3.12" ] + model: [ "llama2-70b-99.9" ] + steps: + - name: Test MLPerf Inference AMD (build only) ${{ matrix.model }} + run: | + if [ -f "gh_action_conda/bin/deactivate" ]; then source gh_action_conda/bin/deactivate; fi + python3 -m venv gh_action_conda + source gh_action_conda/bin/activate + export CM_REPOS=$HOME/GH_CM + pip install --upgrade cm4mlops + pip install tabulate + cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml new file mode 100644 index 0000000000..91420ecc03 --- /dev/null +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -0,0 +1,55 @@ +# This workflow will test the submission generation capability of CM f + +name: CM based Submission Generation + +on: + pull_request: + branches: [ "main", "dev", "mlperf-inference" ] + paths: + - '.github/workflows/test-cm-based-submission-generation.yml' + - '**' + - '!**.md' +jobs: + submission_generation: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: [ "3.12" ] + division: ["closed", "open"] + category: ["datacenter", "edge"] + case: ["case-3", "case-7"] + action: ["run", "docker"] + exclude: + - os: macos-latest + - os: windows-latest + - division: "open" + - category: "edge" + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install cmind + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + - name: Pull repo where test cases are uploaded + run: | + git clone -b submission-generation-tests https://github.com/anandhu-eng/inference.git submission_generation_tests + - name: Run Submission Generation - ${{ matrix.case }} ${{ matrix.action }} ${{ matrix.category }} ${{ matrix.division }} + run: | + if [ "${{ matrix.case }}" == "case-3" ]; then + #results_dir="submission_generation_tests/case-3/" + description="Submission generation (model_mapping.json not present but model name matches with official one)" + elif [ "${{ matrix.case }}" == "case-7" ]; then + #results_dir="submission_generation_tests/case-7/" + description="Submission generation (sut_info.json incomplete, SUT folder name in required format)" + fi + # Dynamically set the log group to simulate a dynamic step name + echo "::group::$description" + cm ${{ matrix.action }} script --tags=generate,inference,submission --clean --preprocess_submission=yes --results_dir=submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --category=${{ matrix.category }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet + echo "::endgroup::" + diff --git a/.github/workflows/test-intel-mlperf-inference-implementations.yml b/.github/workflows/test-intel-mlperf-inference-implementations.yml new file mode 100644 index 0000000000..bbb17c166b --- /dev/null +++ b/.github/workflows/test-intel-mlperf-inference-implementations.yml @@ -0,0 +1,26 @@ +name: MLPerf Inference Intel implementations + +on: + schedule: + - cron: "29 1 * * *" #to be adjusted + +jobs: + build_nvidia: + if: github.repository_owner == 'gateoverflow' + runs-on: [ self-hosted, linux, x64, GO-spr ] + strategy: + fail-fast: false + matrix: + python-version: [ "3.12" ] + model: [ "resnet50", "bert-99" ] + steps: + - name: Test MLPerf Inference Intel ${{ matrix.model }} + run: | + if [ -f "gh_action_conda/bin/deactivate" ]; then source gh_action_conda/bin/deactivate; fi + python3 -m venv gh_action_conda + source gh_action_conda/bin/activate + export CM_REPOS=$HOME/GH_CM + pip install --upgrade cm4mlops + pip install tabulate + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=intel --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cpu --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c diff --git a/.github/workflows/test-mlperf-inference-abtf-poc.yml b/.github/workflows/test-mlperf-inference-abtf-poc.yml index bb8403ed9f..de62093015 100644 --- a/.github/workflows/test-mlperf-inference-abtf-poc.yml +++ b/.github/workflows/test-mlperf-inference-abtf-poc.yml @@ -16,13 +16,30 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, ubuntu-20.04, ubuntu-24.04] + os: [ubuntu-22.04, ubuntu-20.04, ubuntu-24.04, macos-latest, macos-13, windows-latest] python-version: [ "3.8", "3.12" ] backend: [ "pytorch" ] implementation: [ "python" ] + docker: [ "", " --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes" ] + extra-args: [ "--adr.compiler.tags=gcc", "--env.CM_MLPERF_LOADGEN_BUILD_FROM_SRC=off" ] exclude: - os: ubuntu-24.04 python-version: "3.8" + - os: windows-latest + python-version: "3.8" + - os: windows-latest + extra-args: "--adr.compiler.tags=gcc" + - os: windows-latest + docker: " --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes" + # windows docker image is not supported in CM yet + - os: macos-latest + python-version: "3.8" + - os: macos-13 + python-version: "3.8" + - os: macos-latest + docker: " --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes" + - os: macos-13 + docker: " --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes" steps: - uses: actions/checkout@v3 @@ -35,58 +52,60 @@ jobs: pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm pull repo mlcommons@cm4abtf --branch=poc - - name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on docker + + - name: Install Docker on macos + if: runner.os == 'macOS-deactivated' run: | - cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet -v + brew update + brew install --cask docker - build2: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [macos-latest, macos-13] - python-version: [ "3.12" ] - backend: [ "pytorch" ] - implementation: [ "python" ] - - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Start Docker Daemon on macos + if: runner.os == 'macOS-deactivated' run: | - python3 -m pip install cmind - cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - cm pull repo mlcommons@cm4abtf --branch=poc - - name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on ${{ matrix.os }} + open /Applications/Docker.app + echo "Starting Docker, this may take a while..." + + # Set max attempts and initial wait time + MAX_ATTEMPTS=20 + WAIT_TIME=5 + + # Loop until Docker daemon is up or max attempts reached + attempt=1 + while ! docker info > /dev/null 2>&1; do + echo "Attempt $attempt: Waiting for Docker to start..." + sleep $WAIT_TIME + attempt=$((attempt + 1)) + WAIT_TIME=$((WAIT_TIME * 2)) # Exponential backoff + + if [ $attempt -gt $MAX_ATTEMPTS ]; then + echo "Docker failed to start within the timeout period" + exit 1 + fi + done + + echo "Docker is up and running" + + - name: Install Docker Desktop on Windows + if: runner.os == 'Windows-deactivated' run: | - cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet -v - - build3: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [windows-latest] - python-version: [ "3.12" ] - backend: [ "pytorch" ] - implementation: [ "python" ] - exclude: - - python-version: "3.8" + choco install docker-desktop --no-progress -y - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Start Docker Desktop on Windows + if: runner.os == 'Windows-deactivated' run: | - python3 -m pip install cmind - cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - cm pull repo mlcommons@cm4abtf --branch=poc + Start-Process 'C:\Program Files\Docker\Docker\Docker Desktop.exe' + # Wait until Docker daemon is running + $retryCount = 0 + while (!(docker info) -and ($retryCount -lt 10)) { + Write-Output "Waiting for Docker to start..." + Start-Sleep -Seconds 10 + $retryCount++ + } + if ($retryCount -ge 10) { + throw "Docker failed to start" + } + Write-Output "Docker is up and running" + - name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --quiet --env.CM_MLPERF_LOADGEN_BUILD_FROM_SRC=off --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 -v + cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet ${{ matrix.extra-args }} ${{ matrix.docker }} -v diff --git a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index a6e6b86ef7..7ac57d8e70 100644 --- a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -4,7 +4,7 @@ name: MLPerf inference bert (deepsparse, tf, onnxruntime, pytorch) on: - pull_request: + pull_request_target: branches: [ "main", "dev", "mlperf-inference" ] paths: - '.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml' @@ -13,8 +13,7 @@ on: jobs: build: - - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: @@ -22,11 +21,13 @@ jobs: python-version: [ "3.11" ] backend: [ "deepsparse", "tf", "onnxruntime", "pytorch" ] precision: [ "int8", "fp32" ] + os: [ubuntu-latest, windows-latest, macos-latest] exclude: - backend: tf - backend: pytorch - backend: onnxruntime - precision: fp32 + - os: windows-latest steps: - uses: actions/checkout@v3 @@ -38,6 +39,25 @@ jobs: run: | python3 -m pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - - name: Test MLPerf Inference Bert (DeepSparse, TF, ONNX, PyTorch) + - name: Test MLPerf Inference Bert ${{ matrix.backend }} on ${{ matrix.os }} + if: matrix.os == 'windows-latest' + run: | + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} --model=bert-99 --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --precision=${{ matrix.precision }} --target_qps=1 -v --quiet + - name: Test MLPerf Inference Bert ${{ matrix.backend }} on ${{ matrix.os }} + if: matrix.os != 'windows-latest' + run: | + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=bert-99 --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=1 -v --quiet + - name: Push Results + if: github.repository_owner == 'gateoverflow' + env: + USER: "GitHub Action" + EMAIL: "admin@gateoverflow.com" + GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="cTuning" --model=bert-99 --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=1 -v --quiet + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" + git config --global credential.https://github.com.helper "" + git config --global credential.https://github.com.helper "!gh auth git-credential" + git config --global credential.https://gist.github.com.helper "" + git config --global credential.https://gist.github.com.helper "!gh auth git-credential" + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Bert GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml new file mode 100644 index 0000000000..6440d04483 --- /dev/null +++ b/.github/workflows/test-mlperf-inference-dlrm.yml @@ -0,0 +1,48 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: MLPerf inference DLRM-v2 + +on: + schedule: + - cron: "30 1 * * *" + +jobs: + build_reference: + if: github.repository_owner == 'gateoverflow' + runs-on: [ self-hosted, GO-spr, linux, x64 ] + strategy: + fail-fast: false + matrix: + python-version: [ "3.12" ] + device: [ "cpu" ] + + steps: + - name: Test MLPerf Inference DLRM-v2 reference implementation + run: | + source gh_action/bin/deactivate || python3 -m venv gh_action + source gh_action/bin/activate + export CM_REPOS=$HOME/GH_CM + python3 -m pip install cm4mlops + cm pull repo + cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean + + build_intel: + if: github.repository_owner == 'gateoverflow_off' + runs-on: [ self-hosted, GO-spr, linux, x64 ] + strategy: + fail-fast: false + matrix: + python-version: [ "3.12" ] + backend: [ "pytorch" ] + device: [ "cpu" ] + + steps: + - name: Test MLPerf Inference DLRM-v2 INTEL implementation + run: | + source gh_action/bin/deactivate || python3 -m venv gh_action + source gh_action/bin/activate + export CM_REPOS=$HOME/GH_CM + python3 -m pip install cm4mlops + cm pull repo + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index 1c59dea469..0562b9176b 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -5,7 +5,7 @@ name: MLPerf inference GPT-J on: schedule: - - cron: "1 2 * * *" + - cron: "15 19 * * *" jobs: build: @@ -19,15 +19,13 @@ jobs: precision: [ "float16" ] steps: - - name: Install dependencies + - name: Test MLPerf Inference GPTJ run: | source gh_action/bin/deactivate || python3 -m venv gh_action source gh_action/bin/activate export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - - name: Test MLPerf Inference GPTJ - run: | - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index 97bd1bc6fc..b6fb155c48 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -5,12 +5,12 @@ name: MLPerf inference LLAMA 2 70B on: schedule: - - cron: "30 19 * * 4" + - cron: "30 2 * * 4" jobs: build_reference: if: github.repository_owner == 'gateoverflow' - runs-on: [ self-hosted, GO-i9, linux, x64 ] + runs-on: [ self-hosted, GO-spr, linux, x64 ] strategy: fail-fast: false matrix: @@ -24,9 +24,10 @@ jobs: source gh_action/bin/deactivate || python3 -m venv gh_action source gh_action/bin/activate export CM_REPOS=$HOME/GH_CM - python3 -m pip install cm4mlops + pip install cm4mlops + pip install tabulate cm pull repo - python3 -m pip install "huggingface_hub[cli]" + pip install "huggingface_hub[cli]" huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential - name: Test MLPerf Inference LLAMA 2 70B reference implementation run: | diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml new file mode 100644 index 0000000000..1ad3f15134 --- /dev/null +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -0,0 +1,32 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: MLPerf inference MIXTRAL-8x7B + +on: + schedule: + - cron: "45 10 * * *" # 30th minute and 20th hour => 20:30 UTC => 2 AM IST + +jobs: + build_reference: + if: github.repository_owner == 'gateoverflow' + runs-on: [ self-hosted, GO-spr, linux, x64 ] + strategy: + fail-fast: false + matrix: + python-version: [ "3.12" ] + backend: [ "pytorch" ] + device: [ "cpu" ] + + steps: + - name: Test MLPerf Inference MIXTRAL-8X7B reference implementation + run: | + source gh_action/bin/deactivate || python3 -m venv gh_action + source gh_action/bin/activate + export CM_REPOS=$HOME/GH_CM + pip install cm4mlops + pip install "huggingface_hub[cli]" + huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential + cm pull repo + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=1 --target_qps=1 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - GO-i9" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml index f726b8ecc5..4b71896296 100644 --- a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml @@ -4,7 +4,7 @@ name: MLPerf inference MLCommons C++ ResNet50 on: - pull_request: + pull_request_target: branches: [ "main", "dev", "mlperf-inference" ] paths: - '.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml' @@ -13,16 +13,18 @@ on: jobs: build: - - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: python-version: [ "3.12" ] llvm-version: [ "15.0.6", "16.0.4", "17.0.6" ] + os: [ubuntu-latest, windows-latest, macos-latest] exclude: - - llvm-version: "15.0.6" - - llvm-version: "16.0.4" + - llvm-version: "15.0.6" + - llvm-version: "16.0.4" + - os: windows-latest + - os: macos-latest steps: - uses: actions/checkout@v3 @@ -36,6 +38,25 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm cm run script --quiet --tags=install,prebuilt,llvm --version=${{ matrix.llvm-version }} - - name: Test MLPerf Inference MLCommons C++ ResNet50 + - name: Test MLPerf Inference MLCommons C++ ResNet50 on ${{ matrix.os }} + if: matrix.os == 'windows-latest' + run: | + cmr "app mlperf inference mlcommons cpp" --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} --adr.loadgen.tags=_from-pip --pip_loadgen=yes -v --quiet + - name: Test MLPerf Inference MLCommons C++ ResNet50 on ${{ matrix.os }} + if: matrix.os != 'windows-latest' + run: | + cmr "app mlperf inference mlcommons cpp" --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} -v --quiet + - name: Push Results + if: github.repository_owner == 'gateoverflow' + env: + USER: "GitHub Action" + EMAIL: "admin@gateoverflow.com" + GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} run: | - cmr "app mlperf inference mlcommons cpp" -v --quiet + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" + git config --global credential.https://github.com.helper "" + git config --global credential.https://github.com.helper "!gh auth git-credential" + git config --global credential.https://gist.github.com.helper "" + git config --global credential.https://gist.github.com.helper "!gh auth git-credential" + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from MLCommons C++ ResNet50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 5867abbf68..5e1f00c478 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -42,7 +42,8 @@ jobs: - name: Test MLPerf Inference ResNet50 (Windows) if: matrix.os == 'windows-latest' run: | - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_windows --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + git config --system core.longpaths true + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet - name: Test MLPerf Inference ResNet50 (Linux/macOS) if: matrix.os != 'windows-latest' run: | @@ -52,13 +53,12 @@ jobs: env: USER: "GitHub Action" EMAIL: "admin@gateoverflow.com" + GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} run: | - git config --global user.name "$USER" - git config --global user.email "$EMAIL" + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" git config --global credential.https://github.com.helper "" git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - - cm run script --tags=auth,gh,cli --with_token="${{ secrets.TEST_RESULTS_GITHUB_TOKEN }}" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R50 GH action" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index 61f9d30cd2..5077ad19e7 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -4,7 +4,7 @@ name: MLPerf inference retinanet on: - pull_request: + pull_request_target: branches: [ "main", "dev", "mlperf-inference" ] paths: - '.github/workflows/test-mlperf-inference-retinanet.yml' @@ -13,17 +13,21 @@ on: jobs: build: - - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: + os: [ubuntu-latest, windows-latest, macos-latest] python-version: [ "3.12" ] backend: [ "onnxruntime", "pytorch" ] implementation: [ "python", "cpp" ] exclude: - backend: pytorch implementation: cpp + - os: windows-latest + implementation: cpp + - os: macos-latest + implementation: cpp steps: - uses: actions/checkout@v3 @@ -35,6 +39,26 @@ jobs: run: | python3 -m pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} + - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} + if: matrix.os == 'windows-latest' + run: | + git config --system core.longpaths true + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} --model=retinanet --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 + - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} + if: matrix.os != 'windows-latest' + run: | + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + - name: Push Results + if: github.repository_owner == 'gateoverflow' + env: + USER: "GitHub Action" + EMAIL: "admin@gateoverflow.com" + GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="cTuning" --hw_name=default --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" + git config --global credential.https://github.com.helper "" + git config --global credential.https://github.com.helper "!gh auth git-credential" + git config --global credential.https://gist.github.com.helper "" + git config --global credential.https://gist.github.com.helper "!gh auth git-credential" + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index fd452c3ba6..5527be3fac 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -1,12 +1,11 @@ name: MLPerf inference SDXL -#off now as we have SCC24 test doing the same on: schedule: - - cron: "1 2 * * *" + - cron: "30 2 * * *" jobs: build_reference: - if: github.repository_owner == 'gateoverflow_off' + if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64 ] strategy: fail-fast: false @@ -22,23 +21,5 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean - - build_nvidia: - if: github.repository_owner == 'gateoverflow_off' - runs-on: [ self-hosted, linux, x64 ] - strategy: - fail-fast: false - matrix: - python-version: [ "3.12" ] - backend: [ "tensorrt" ] - precision: [ "float16" ] - implementation: [ "nvidia" ] - steps: - - name: Test MLPerf Inference SDXL Nvidia - run: | - source gh_action/bin/deactivate || python3 -m venv gh_action - source gh_action/bin/activate - export CM_REPOS=$HOME/GH_CM - cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml index ed73c7ad03..597121fb0d 100644 --- a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml +++ b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml @@ -5,7 +5,7 @@ name: MLPerf loadgen with HuggingFace bert onnx fp32 squad model on: pull_request: - branches: [ "main", "dev" ] + branches: [ "main", "dev", "mlperf-inference" ] paths: - '.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml' - '**' @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.12" ] + python-version: [ "3.10", "3.12" ] steps: - uses: actions/checkout@v3 @@ -30,7 +30,6 @@ jobs: run: | python3 -m pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - cm run script --quiet --tags=get,sys-utils-cm - name: Test MLPerf loadgen with HuggingFace bert onnx fp32 squad model run: | - cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx --quiet + cm run script --tags=python,app,loadgen-generic,_onnxruntime,_custom,_huggingface,_model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1 --quiet diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml new file mode 100644 index 0000000000..5373a825b4 --- /dev/null +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -0,0 +1,26 @@ +name: MLPerf Inference Nvidia implementations + +on: + schedule: + - cron: "49 19 * * *" #to be adjusted + +jobs: + build_nvidia: + if: github.repository_owner == 'gateoverflow' + runs-on: [ self-hosted, linux, x64, GO-spr ] + strategy: + fail-fast: false + matrix: + python-version: [ "3.12" ] + model: [ "resnet50", "retinanet", "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9" ] + steps: + - name: Test MLPerf Inference NVIDIA ${{ matrix.model }} + run: | + if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi + python3 -m venv gh_action + source gh_action/bin/activate + export CM_REPOS=$HOME/GH_CM + pip install --upgrade cm4mlops + pip install tabulate + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=RTX4090x2 --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_RTX4090x2" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=RTX4090x2 diff --git a/.github/workflows/test-qaic-software-kit.yml b/.github/workflows/test-qaic-software-kit.yml index e3a186daae..4b877da008 100644 --- a/.github/workflows/test-qaic-software-kit.yml +++ b/.github/workflows/test-qaic-software-kit.yml @@ -38,4 +38,3 @@ jobs: - name: Test Software Kit for compilation on Ubuntu 20.04 run: | cm run script --tags=get,qaic,software,kit --adr.compiler.tags=${{ matrix.compiler }} --adr.compiler.version=${{ matrix.llvm-version }} --quiet - cm run script --tags=get,qaic,software,kit --adr.compiler.tags=${{ matrix.compiler }} --adr.compiler.version=${{ matrix.llvm-version }} --quiet diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 680d0f5f43..802593fe25 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "1 3 * * *" + - cron: "35 19 * * *" jobs: build_reference: @@ -27,9 +27,9 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results + cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions build_nvidia: @@ -52,7 +52,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000..a17c59a62a --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include README.md +include VERSION diff --git a/README.md b/README.md index deff5458ce..843040705d 100644 --- a/README.md +++ b/README.md @@ -13,126 +13,12 @@ [![Test QAIC Software kit Compilation](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml) -This repository contains reusable and cross-platform automation recipes to run DevOps, MLOps, and MLPerf -via a simple and human-readable [Collective Mind interface (CM)](https://github.com/mlcommons/ck) -while adapting to different operating systems, software and hardware. +Please see the [docs](https://docs.mlcommons.org/cm4mlops/) site for understanding CM scripts better. The `mlperf-branch` of the **cm4mlops** repository contains updated CM scripts specifically for MLPerf Inference. For more information on using CM for MLPerf Inference, visit the [MLPerf Inference Documentation site](https://docs.mlcommons.org/inference/). -All СM scripts have a simple Python API, extensible JSON/YAML meta description -and unified input/output to make them reusable in different projects either individually -or by chaining them together into portable automation workflows, applications -and web services adaptable to continuously changing models, data sets, software and hardware. - -We develop and test [CM scripts](script) as a community effort to support the following projects: -* [CM for MLPerf](https://docs.mlcommons.org/inference): modularize and automate MLPerf benchmarks - * [Modular C++ harness for MLPerf loadgen](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-cpp) - * [Modular Python harness for MLPerf loadgen](https://github.com/mlcommons/cm4mlops/tree/main/script/app-loadgen-generic-python) -* [CM for research and education](https://cTuning.org/ae): provide a common interface to automate and reproduce results from research papers - and MLPerf benchmarks; -* [CM for ABTF](https://github.com/mlcommons/cm4abtf): provide a unified CM interface to run automotive benchmarks; -* [CM for optimization](https://access.cknowledge.org/playground/?action=challenges): co-design efficient and cost-effective - software and hardware for AI, ML and other emerging workloads via open challenges. - -You can read this [ArXiv paper](https://arxiv.org/abs/2406.16791) to learn more about the CM motivation and long-term vision. - -Please provide your feedback or submit your issues [here](https://github.com/mlcommons/cm4mlops/issues). - - -## Catalog - -Online catalog: [cKnowledge](https://access.cknowledge.org/playground/?action=scripts), [MLCommons](https://docs.mlcommons.org/cm4mlops/scripts). - -## Citation - -Please use this [BibTeX file](https://github.com/mlcommons/ck/blob/master/citation.bib) to cite this project. - -## A few demos - -### Install CM and virtual env - -Install the [MLCommons CM automation language](https://access.cknowledge.org/playground/?action=install). - -### Pull this repository - -```bash -cm pull repo mlcommons@cm4mlops --branch=dev -``` - -### Run image classification using CM - -```bash - -cm run script "python app image-classification onnx _cpu" --help - -cm run script "download file _wget" --url=https://cKnowledge.org/ai/data/computer_mouse.jpg --verify=no --env.CM_DOWNLOAD_CHECKSUM=45ae5c940233892c2f860efdf0b66e7e -cm run script "python app image-classification onnx _cpu" --input=computer_mouse.jpg - -cmr "python app image-classification onnx _cpu" --input=computer_mouse.jpg -cmr --tags=python,app,image-classification,onnx,_cpu --input=computer_mouse.jpg -cmr 3d5e908e472b417e --input=computer_mouse.jpg - -cm docker script "python app image-classification onnx _cpu" --input=computer_mouse.jpg - -cm gui script "python app image-classification onnx _cpu" -``` - -### Re-run experiments from the ACM/IEEE MICRO'23 paper - -Check this [script/reproduce-ieee-acm-micro2023-paper-96](README.md). - -### Run MLPerf ResNet CPU inference benchmark via CM - -```bash -cm run script --tags=run-mlperf,inference,_performance-only,_short \ - --division=open \ - --category=edge \ - --device=cpu \ - --model=resnet50 \ - --precision=float32 \ - --implementation=mlcommons-python \ - --backend=onnxruntime \ - --scenario=Offline \ - --execution_mode=test \ - --power=no \ - --adr.python.version_min=3.8 \ - --clean \ - --compliance=no \ - --quiet \ - --time -``` - -### Run MLPerf BERT CUDA inference benchmark v4.1 via CM - -```bash -cmr "run-mlperf inference _find-performance _full _r4.1" \ - --model=bert-99 \ - --implementation=nvidia \ - --framework=tensorrt \ - --category=datacenter \ - --scenario=Offline \ - --execution_mode=test \ - --device=cuda \ - --docker \ - --docker_cm_repo=mlcommons@cm4mlops \ - --docker_cm_repo_flags="--branch=mlperf-inference" \ - --test_query_count=100 \ - --quiet -``` - -### Run MLPerf SDXL reference inference benchmark v4.1 via CM - -```bash -cm run script \ - --tags=run-mlperf,inference,_r4.1 \ - --model=sdxl \ - --implementation=reference \ - --framework=pytorch \ - --category=datacenter \ - --scenario=Offline \ - --execution_mode=valid \ - --device=cuda \ - --quiet -``` +## News +* [Upcoming Changes](https://github.com/mlcommons/cm4mlops/discussions/categories/announcements) +* [Ongoing Discussions](https://github.com/mlcommons/cm4mlops/discussions/categories/ideas) ## License diff --git a/VERSION b/VERSION new file mode 100644 index 0000000000..88dbf46f41 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.3.25 diff --git a/automation/script/module.py b/automation/script/module.py index 9494e7a39a..7368349c0d 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -8,7 +8,6 @@ # TBD: when we have bandwidth and resources, we should refactor it # and make it cleaner and simpler while keeping full backwards compatibility. # - import os import logging @@ -413,10 +412,6 @@ def _run(self, i): ignore_script_error = i.get('ignore_script_error', False) - # Get constant env and state - const = i.get('const',{}) - const_state = i.get('const_state',{}) - # Detect current path and record in env for further use in native scripts current_path = os.path.abspath(os.getcwd()) r = _update_env(env, 'CM_TMP_CURRENT_PATH', current_path) @@ -838,8 +833,8 @@ def _run(self, i): script_artifact_env = meta.get('env',{}) env.update(script_artifact_env) - - + script_artifact_state = meta.get('state',{}) + utils.merge_dicts({'dict1':state, 'dict2':script_artifact_state, 'append_lists':True, 'append_unique':True}) @@ -853,7 +848,7 @@ def _run(self, i): # STEP 700: Overwrite env with keys from the script input (to allow user friendly CLI) - # IT HAS THE PRIORITY OVER meta['default_env'] and meta['env'] + # IT HAS THE PRIORITY OVER meta['default_env'] and meta['env'] but not over the meta from versions/variations # (env OVERWRITE - user enforces it from CLI) # (it becomes const) if input_mapping: @@ -866,7 +861,9 @@ def _run(self, i): # update_env_from_input_mapping(const, i, docker_input_mapping) - + #Update env/state with cost + env.update(const) + utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True}) @@ -882,7 +879,7 @@ def _run(self, i): variations = script_artifact.meta.get('variations', {}) state['docker'] = meta.get('docker', {}) - r = self._update_state_from_variations(i, meta, variation_tags, variations, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose) + r = self._update_state_from_variations(i, meta, variation_tags, variations, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose) if r['return'] > 0: return r @@ -952,7 +949,7 @@ def _run(self, i): if version!='' and version in versions: versions_meta = versions[version] - r = update_state_from_meta(versions_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) + r = update_state_from_meta(versions_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) if r['return']>0: return r adr=get_adr(versions_meta) if adr: @@ -973,7 +970,19 @@ def _run(self, i): if state.get('docker'): if str(state['docker'].get('run', True)).lower() in ['false', '0', 'no']: logging.info(recursion_spaces+' - Skipping script::{} run as we are inside docker'.format(found_script_artifact)) - return {'return': 0} + + # restore env and state + for k in list(env.keys()): + del(env[k]) + for k in list(state.keys()): + del(state[k]) + + env.update(saved_env) + state.update(saved_state) + + rr = {'return':0, 'env':env, 'new_env':{}, 'state':state, 'new_state':{}, 'deps': []} + return rr + elif str(state['docker'].get('real_run', True)).lower() in ['false', '0', 'no']: logging.info(recursion_spaces+' - Doing fake run for script::{} as we are inside docker'.format(found_script_artifact)) fake_run = True @@ -1328,7 +1337,7 @@ def _run(self, i): if default_version in versions: versions_meta = versions[default_version] - r = update_state_from_meta(versions_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) + r = update_state_from_meta(versions_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) if r['return']>0: return r if "add_deps_recursive" in versions_meta: @@ -1374,7 +1383,6 @@ def _run(self, i): r = update_env_with_values(env) if r['return']>0: return r - # Clean some output files clean_tmp_files(clean_files, recursion_spaces) @@ -1451,8 +1459,12 @@ def _run(self, i): elif pip_version_max != '': pip_version_string = '<='+pip_version_max + env.update(const) + utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True}) + r = _update_env(env, 'CM_TMP_PIP_VERSION_STRING', pip_version_string) if r['return']>0: return r + if pip_version_string != '': logging.debug(recursion_spaces+' # potential PIP version string (if needed): '+pip_version_string) @@ -1462,10 +1474,6 @@ def _run(self, i): logging.debug(recursion_spaces+' - Running preprocess ...') - # Update env and state with const - utils.merge_dicts({'dict1':env, 'dict2':const, 'append_lists':True, 'append_unique':True}) - utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True}) - run_script_input['run_state'] = run_state ii = copy.deepcopy(customize_common_input) @@ -1916,7 +1924,7 @@ def _dump_version_info_for_script(self, output_dir = os.getcwd(), quiet = False, return {'return': 0} ###################################################################################### - def _update_state_from_variations(self, i, meta, variation_tags, variations, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose): + def _update_state_from_variations(self, i, meta, variation_tags, variations, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose): # Save current explicit variations import copy @@ -2019,7 +2027,7 @@ def _update_state_from_variations(self, i, meta, variation_tags, variations, env if variation_tag_dynamic_suffix: self._update_variation_meta_with_dynamic_suffix(variation_meta, variation_tag_dynamic_suffix) - r = update_state_from_meta(variation_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) + r = update_state_from_meta(variation_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) if r['return']>0: return r if variation_meta.get('script_name', '')!='': @@ -2050,7 +2058,7 @@ def _update_state_from_variations(self, i, meta, variation_tags, variations, env combined_variation_meta = variations[combined_variation] - r = update_state_from_meta(combined_variation_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) + r = update_state_from_meta(combined_variation_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i) if r['return']>0: return r adr=get_adr(combined_variation_meta) @@ -2416,10 +2424,90 @@ def test(self, i): alias = meta.get('alias','') uid = meta.get('uid','') - if console: logging.info(path) - logging.info(' Test: TBD') + test_config = meta.get('tests', '') + if test_config: + logging.info(test_config) + variations = meta.get("variations") + tags_string = ",".join(meta.get("tags")) + test_input_index = i.get('test_input_index') + test_input_id = i.get('test_input_id') + run_inputs = i.get("run_inputs", test_config.get('run_inputs', [ {"docker_os": "ubuntu", "docker_os_version":"22.04"} ])) + if test_input_index: + index_plus = False + try: + if test_input_index.endswith("+"): + input_index = int(test_input_index[:-1]) + index_plus = True + else: + input_index = int(test_input_index) + except ValueError as e: + print(e) + return {'return': 1, 'error': f'Invalid test_input_index: {test_input_index}. Must be an integer or an integer followed by a +'} + if input_index > len(run_inputs): + run_inputs = [] + else: + if index_plus: + run_inputs = run_inputs[index_index-1:] + else: + run_inputs = [ run_inputs[input_index - 1] ] + + for run_input in run_inputs: + if test_input_id: + if run_input.get('id', '') != test_input_id: + continue + + + ii = {'action': 'run', + 'automation':'script', + 'quiet': i.get('quiet'), + } + test_all_variations = run_input.get('test-all-variations', False) + if test_all_variations: + run_variations = [ f"_{v}" for v in variations if variations[v].get('group', '') == '' and str(variations[v].get('exclude-in-test', '')).lower() not in [ "1", "true", "yes" ] ] + else: + given_variations = run_input.get('variations_list', []) + if given_variations: + v_split = [] + run_variations = [] + for i, v in enumerate(given_variations): + v_split = v.split(",") + for t in v_split: + if not t.startswith("_"): + given_variations[i] = f"_{t}" #variations must begin with _. We support both with and without _ in the meta + if v_split: + run_variations.append(",".join(v_split)) + else: + run_variations = [ "" ] #run the test without any variations + use_docker = run_input.get('docker', False) + for key in run_input:#override meta with any user inputs like for docker_cm_repo + if i.get(key, '') != '': + if type(run_input[key]) == dict: + utils.merge_dicts({'dict1': run_input[key] , 'dict2':i[key], 'append_lists':True, 'append_unique':True}) + else: + run_input[key] = i[key] + ii = {**ii, **run_input} + i_env = ii.get('env', i.get('env', {})) + if use_docker: + ii['action'] = "docker" + for key in i: + if key.startswith("docker_"): + ii[key] = i[key] + + if ii.get('docker_image_name', '') == '': + ii['docker_image_name'] = alias + + for variation_tags in run_variations: + run_tags = f"{tags_string},{variation_tags}" + ii['tags'] = run_tags + if i_env: + import copy + ii['env'] = copy.deepcopy(i_env) + logging.info(ii) + r = self.cmind.access(ii) + if r['return'] > 0: + return r return {'return':0, 'list': lst} @@ -3012,8 +3100,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a 'remembered_selections': remembered_selections, 'env':env, 'state':state, - 'const':const, - 'const_state':const_state, + 'const':copy.deepcopy(const), + 'const_state':copy.deepcopy(const_state), 'add_deps_recursive':add_deps_recursive, 'debug_script_tags':debug_script_tags, 'verbose':verbose, @@ -3040,6 +3128,11 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a r = update_env_with_values(env) if r['return']>0: return r + #Update env/state with cost + env.update(const) + utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True}) + + return {'return': 0} ############################################################################## @@ -3885,8 +3978,11 @@ def parse_version(self, i): return r string = r['string'] - - version = r['match'].group(group_number) + + if r['match'].lastindex and r['match'].lastindex >= group_number: + version = r['match'].group(group_number) + else: + return {'return':1, 'error': 'Invalid version detection group number. Version was not detected. Last index of match = {}. Given group number = {}'.format(r['match'].lastindex, group_number)} which_env[env_key] = version which_env['CM_DETECTED_VERSION'] = version # to be recorded in the cache meta @@ -3914,6 +4010,19 @@ def update_deps(self, i): return {'return':0} + ############################################################################## + def update_state_from_meta(self, meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys, new_state_keys, i): + """ + Updates state and env from meta + Args: + """ + + r = update_state_from_meta(meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys, new_state_keys, i) + if r['return']>0: + return r + + return {'return':0} + ############################################################################## def get_default_path_list(self, i): default_path_env_key = i.get('default_path_env_key', '') @@ -4418,7 +4527,7 @@ def update_env_with_values(env, fail_on_not_found=False, extra_env={}): # Check cases such as --env.CM_SKIP_COMPILE if type(value)==bool: - env[key] = str(value) + env[key] = value continue tmp_values = re.findall(r'<<<(.*?)>>>', str(value)) @@ -5110,7 +5219,7 @@ def update_env_from_input_mapping(env, inp, input_mapping): env[input_mapping[key]] = inp[key] ############################################################################## -def update_state_from_meta(meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys, new_state_keys, i): +def update_state_from_meta(meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys, new_state_keys, i): """ Internal: update env and state from meta """ @@ -5118,12 +5227,23 @@ def update_state_from_meta(meta, env, state, deps, post_deps, prehook_deps, post default_env = meta.get('default_env',{}) for key in default_env: env.setdefault(key, default_env[key]) + update_env = meta.get('env', {}) env.update(update_env) + update_const = meta.get('const', {}) + if update_const: + const.update(update_const) + env.update(const) + update_state = meta.get('state', {}) utils.merge_dicts({'dict1':state, 'dict2':update_state, 'append_lists':True, 'append_unique':True}) + update_const_state = meta.get('const_state', {}) + if const_state: + utils.merge_dicts({'dict1':const_state, 'dict2':update_const_state, 'append_lists':True, 'append_unique':True}) + utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True}) + new_deps = meta.get('deps', []) if len(new_deps)>0: append_deps(deps, new_deps) @@ -5152,9 +5272,17 @@ def update_state_from_meta(meta, env, state, deps, post_deps, prehook_deps, post r4 = update_deps(posthook_deps, add_deps_info, True, env) if r1['return']>0 and r2['return']>0 and r3['return'] > 0 and r4['return'] > 0: return r1 + # i would have 'input' when called through cm.access + input_update_env = i.get('input', i) + input_mapping = meta.get('input_mapping', {}) if input_mapping: - update_env_from_input_mapping(env, i['input'], input_mapping) + update_env_from_input_mapping(env, input_update_env, input_mapping) + + # handle dynamic env values + r = update_env_with_values(env) + if r['return']>0: + return r # Possibly restrict this to within docker environment add_deps_info = meta.get('ad', i.get('ad', {})) #we need to see input here diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index b68d90e279..400021594b 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1086,7 +1086,9 @@ def doc(i): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -def update_path_for_docker(path, mounts, force_path_target=''): +# This function takes in a host path and returns the absolute path on host and the container +# If mounts is passed, the function appends the host path and the container path to mounts in the form "host_path:container_path" +def update_path_for_docker(path, mounts=None, force_path_target=''): path_orig = '' path_target = '' @@ -1114,14 +1116,14 @@ def update_path_for_docker(path, mounts, force_path_target=''): x = path_orig + ':' + path_target # CHeck if no duplicates - to_add = True - for y in mounts: - if y.lower()==x.lower(): - to_add = False - break - - if to_add: - mounts.append(x) + if mounts != None: + to_add = True + for y in mounts: + if y.lower()==x.lower(): + to_add = False + break + if to_add: + mounts.append(x) return (path_orig, path_target) @@ -1393,6 +1395,8 @@ def dockerfile(i): env=i.get('env', {}) state = i.get('state', {}) + const=i.get('const', {}) + const_state = i.get('const_state', {}) script_automation = i['self_module'] dockerfile_env=i.get('dockerfile_env', {}) @@ -1420,7 +1424,7 @@ def dockerfile(i): state['docker'] = docker_settings add_deps_recursive = i.get('add_deps_recursive', {}) - r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False) + r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, const, const_state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False) if r['return'] > 0: return r @@ -1470,6 +1474,8 @@ def dockerfile(i): run_cmd = r['run_cmd_string'] cm_repo = i.get('docker_cm_repo', docker_settings.get('cm_repo', 'mlcommons@cm4mlops')) + cm_repo_branch = i.get('docker_cm_repo_branch', docker_settings.get('cm_repo_branch', 'mlperf-inference')) + cm_repo_flags = i.get('docker_cm_repo_flags', docker_settings.get('cm_repo_flags', '')) docker_base_image = i.get('docker_base_image', docker_settings.get('base_image')) @@ -1542,6 +1548,7 @@ def dockerfile(i): 'automation': 'script', 'tags': 'build,dockerfile', 'cm_repo': cm_repo, + 'cm_repo_branch': cm_repo_branch, 'cm_repo_flags': cm_repo_flags, 'docker_base_image': docker_base_image, 'docker_os': docker_os, @@ -1612,8 +1619,11 @@ def get_container_path(value): new_path_split1 = new_path_split + path_split[repo_entry_index:repo_entry_index+3] new_path_split2 = new_path_split + path_split[repo_entry_index:] return "/".join(new_path_split1), "/".join(new_path_split2) + else: + orig_path,target_path = update_path_for_docker(path=value) + return target_path, target_path - return value, value + # return value, value ############################################################ @@ -1682,7 +1692,7 @@ def docker(i): env=i.get('env', {}) noregenerate_docker_file = i.get('docker_noregenerate', False) - norecreate_docker_image = i.get('docker_norecreate', False) + norecreate_docker_image = i.get('docker_norecreate', True) if i.get('docker_skip_build', False): noregenerate_docker_file = True @@ -1738,6 +1748,8 @@ def docker(i): env['CM_RUN_STATE_DOCKER'] = False script_automation = i['self_module'] state = i.get('state', {}) + const = i.get('const', {}) + const_state = i.get('const_state', {}) tags_split = i.get('tags', '').split(",") variation_tags = [ t[1:] for t in tags_split if t.startswith("_") ] @@ -1790,7 +1802,11 @@ def docker(i): state['docker'] = docker_settings add_deps_recursive = i.get('add_deps_recursive', {}) - r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False) + r = script_automation.update_state_from_meta(meta, env, state, const, const_state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys = [], new_state_keys = [], i = i) + if r['return'] > 0: + return r + + r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, const, const_state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False) if r['return'] > 0: return r @@ -1904,11 +1920,12 @@ def docker(i): mounts[index] = new_host_mount+":"+new_container_mount if host_env_key: container_env_string += " --env.{}={} ".format(host_env_key, container_env_key) - # check if the below lines are needed when inputs are mapped to container paths - '''for v in docker_input_mapping: + + for v in docker_input_mapping: if docker_input_mapping[v] == host_env_key: - i[v] = container_env_key - ''' + i[v] = container_env_key + i_run_cmd[v] = container_env_key + mounts = list(filter(lambda item: item is not None, mounts)) mount_string = "" if len(mounts)==0 else ",".join(mounts) @@ -1969,6 +1986,8 @@ def docker(i): device = i.get('docker_device', docker_settings.get('device')) + image_name = i.get('docker_image_name', docker_settings.get('image_name', '')) + r = check_gh_token(i, docker_settings, quiet) if r['return'] >0 : return r gh_token = r['gh_token'] @@ -2040,7 +2059,7 @@ def docker(i): 'image_repo': image_repo, 'interactive': interactive, 'mounts': mounts, - 'image_name': i.get('docker_image_name', ''), + 'image_name': image_name, # 'image_tag': script_alias, 'image_tag_extra': image_tag_extra, 'detached': detached, diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000000..baed31eea3 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,135 @@ + +# Getting Started with CM Script Automation + +## Running CM Scripts + +To execute a simple script in CM that captures OS details, use the following command: + +```bash +cm run script --tags=detect,os -j +``` + +This command gathers details about the system on which it's run, such as: + +```json +{ + "CM_HOST_OS_TYPE": "linux", + "CM_HOST_OS_BITS": "64", + "CM_HOST_OS_FLAVOR": "ubuntu", + "CM_HOST_OS_FLAVOR_LIKE": "debian", + "CM_HOST_OS_VERSION": "24.04", + "CM_HOST_OS_KERNEL_VERSION": "6.8.0-45-generic", + "CM_HOST_OS_GLIBC_VERSION": "2.39", + "CM_HOST_OS_MACHINE": "x86_64", + "CM_HOST_OS_PACKAGE_MANAGER": "apt", + "CM_HOST_OS_PACKAGE_MANAGER_INSTALL_CMD": "DEBIAN_FRONTEND=noninteractive apt-get install -y", + "CM_HOST_OS_PACKAGE_MANAGER_UPDATE_CMD": "apt-get update -y", + "+CM_HOST_OS_DEFAULT_LIBRARY_PATH": [ + "/usr/local/lib/x86_64-linux-gnu", + "/lib/x86_64-linux-gnu", + "/usr/lib/x86_64-linux-gnu", + "/usr/lib/x86_64-linux-gnu64", + "/usr/local/lib64", + "/lib64", + "/usr/lib64", + "/usr/local/lib", + "/lib", + "/usr/lib", + "/usr/x86_64-linux-gnu/lib64", + "/usr/x86_64-linux-gnu/lib" + ], + "CM_HOST_PLATFORM_FLAVOR": "x86_64", + "CM_HOST_PYTHON_BITS": "64", + "CM_HOST_SYSTEM_NAME": "intel-spr-i9" +} +``` + +For more details on CM scripts, see the [CM documentation](index.md). + +### Adding New CM Scripts + +CM aims to provide lightweight connectors between existing automation scripts and tools without substituting them. You can add your own scripts to CM with the following command, which creates a script named `hello-world`: + +```bash +cm add script hello-world --tags=hello-world,display,test +``` + +This command initializes a CM script in the local repository with the following structure: + +``` +└── CM + ├── index.json + ├── repos + │ ├── local + │ │ ├── cfg + │ │ ├── cache + │ │ ├── cmr.yaml + │ │ └── script + │ │ └── hello-world + │ │ ├── _cm.yaml + │ │ ├── customize.py + │ │ ├── README-extra.md + │ │ ├── run.bat + │ │ └── run.sh + │ └── mlcommons@cm4mlops + └── repos.json +``` + +You can also execute the script from Python as follows: + +```python +import cmind +output = cmind.access({'action':'run', 'automation':'script', 'tags':'hello-world,display,test'}) +if output['return'] == 0: + print(output) +``` + +If you discover that your new script is similar to an existing script in any CM repository, you can clone an existing script using the following command: + +```bash +cm copy script .: +``` + +Here, `` is the name of the existing script, and `` is the name of the new script you're creating. Existing script names in the `cm4mlops` repository can be found [here](https://github.com/mlcommons/cm4mlops/tree/mlperf-inference/script). + +## Caching and Reusing CM Script Outputs + +By default, CM scripts run in the current directory and record all new files there. For example, a universal download script might download an image to the current directory: + +```bash +cm run script --tags=download,file,_wget --url=https://cKnowledge.org/ai/data/computer_mouse.jpg --verify=no --env.CM_DOWNLOAD_CHECKSUM=45ae5c940233892c2f860efdf0b66e7e +``` + +To cache and reuse the output of scripts, CM offers a `cache` automation feature similar to `script`. When `"cache":true` is specified in a script's metadata, CM will create a `cache` directory in `$HOME/CM/repos/local` with a unique ID and the same tags as `script`, and execute the script there. + +Subsequent executions of the same script will reuse files from the cache, avoiding redundancy. This is especially useful for large files or data sets. + +You can manage cache entries and find specific ones using commands like: + +```bash +cm show cache +cm show cache --tags=get,ml-model,resnet50,_onnx +cm find cache --tags=download,file,ml-model,resnet50,_onnx +cm info cache --tags=download,file,ml-model,resnet50,_onnx +``` + +To clean cache entries: + +```bash +cm rm cache --tags=ml-model,resnet50 +cm rm cache -f # Clean all entries +``` + +You can completely reset the CM framework by removing the `$HOME/CM` directory, which deletes all downloaded repositories and cached entries. + +## Integration with Containers + +CM scripts are designed to run natively or inside containers with the same commands. You can substitute `cm run script` with `cm docker script` to execute a script inside an automatically-generated container: + +```bash +cm docker script --tags=python,app,image-classification,onnx,_cpu +``` + +CM automatically handles the generation of Dockerfiles, building of containers, and execution within containers, providing a seamless experience whether running scripts natively or in containers. + +This approach simplifies the development process by eliminating the need for separate Dockerfile maintenance and allows for the use of native scripts and workflows directly within containers. diff --git a/docs/index.md b/docs/index.md index 175d04c0d4..9a74cd2b34 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,7 @@ Please check the [CM documentation](https://docs.mlcommons.org/ck) for more deta See the [automatically generated catalog](scripts/index.md) of all CM scripts from MLCommons. -## Getting started with CM scripts +## Understanding CM scripts * A CM script is identified by a set of tags and by unique ID. * Further each CM script can have multiple variations and they are identified by variation tags which are treated in the same way as tags and identified by a `_` prefix. @@ -69,8 +69,8 @@ Sometimes it is difficult to add all variations needed for a script like say `ba * By using `--new` input, a new cache entry can be forced even when an old one exist. * By default no depndencies are run for a cached entry unless `dynamic` key is set for it. -### Updating ENV from inside the run script -* [TBD] + +Please see [here](getting-started.md) for trying CM scripts. diff --git a/mkdocs.yml b/mkdocs.yml index 7eda0cd8aa..4cb2956917 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,6 +20,7 @@ theme: - toc.follow nav: - HOME: index.md + - Getting Started: getting-started.md - CM Scripts: - scripts/index.md - Python-automation: scripts/Python-automation/index.md diff --git a/project/mlperf-inference-v3.0-submissions/README.md b/project/mlperf-inference-v3.0-submissions/README.md deleted file mode 100644 index 7ad8080b0c..0000000000 --- a/project/mlperf-inference-v3.0-submissions/README.md +++ /dev/null @@ -1,10 +0,0 @@ -Graphs: - https://cknowledge.org/cm-gui-graph/?tags=mlperf-inference,all,open,edge,image-classification,singlestream - https://cknowledge.org/cm-gui-graph/?tags=mlperf-inference,v3.0,open,edge,image-classification,singlestream&x=Result&y=Accuracy - - http://localhost:8501/?tags=mlperf-inference,v3.0,open,edge,image-classification,singlestream&x=Result&y=Accuracy - http://localhost:8501/?tags=mlperf-inference,all,open,edge,image-classification,singlestream&x=Result&y=Accuracy - -Local: - cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.1" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.1 - cm run script "gui _graph" diff --git a/project/mlperf-inference-v3.0-submissions/_cm.json b/project/mlperf-inference-v3.0-submissions/_cm.json deleted file mode 100644 index 2cc81aa8b0..0000000000 --- a/project/mlperf-inference-v3.0-submissions/_cm.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "alias": "mlperf-inference-v3.0-submissions", - "automation_alias": "project", - "automation_uid": "6882553224164c56", - "tags": [], - "uid": "f571becbcbd44a7d" -} diff --git a/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md b/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md deleted file mode 100644 index 9aae9bbe55..0000000000 --- a/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md +++ /dev/null @@ -1,285 +0,0 @@ -# Crowd-benchmarking MLPerf BERT inference - -
-Click here to see the table of contents. - -* [Crowd-benchmarking MLPerf BERT inference](#crowd-benchmarking-mlperf-bert-inference) -* [System preparation](#system-preparation) - * [Minimal system requirements](#minimal-system-requirements) - * [Install CM (CK2) automation meta-framework](#install-cm-ck2-automation-meta-framework) - * [Pull CM repository with portable automation recipes](#pull-cm-repository-with-portable-automation-recipes) - * [Detect or install CUDA](#detect-or-install-cuda) - * [Test CUDA installation](#test-cuda-installation) - * [Install Python virtual environment](#install-python-virtual-environment) - * [Detect or install cuDNN](#detect-or-install-cudnn) - * [Detect or install TensorRT](#detect-or-install-tensorrt) - * [Run MLPerf inference benchmark with BERT](#run-mlperf-inference-benchmark-with-bert) - * [Try ONNX runtime backend](#try-onnx-runtime-backend) - * [Do a test run to detect and record the system performance](#do-a-test-run-to-detect-and-record-the-system-performance) - * [Do a full accuracy run for all the scenarios](#do-a-full-accuracy-run-for-all-the-scenarios) - * [Do a full performance run for all the scenarios](#do-a-full-performance-run-for-all-the-scenarios) - * [Populate the README files](#populate-the-readme-files) - * [Generate MLPerf submission tree](#generate-mlperf-submission-tree) - * [Push the results to GitHub repo](#push-the-results-to-github-repo) - * [Try PyTorch backend](#try-pytorch-backend) - * [Test composable ML benchmark with other models, data sets, frameworks and platforms](#test-composable-ml-benchmark-with-other-models-data-sets-frameworks-and-platforms) -* [The next steps](#the-next-steps) - -
- - -This is a pilot community project to collaboratively run MLPerf BERT inference benchmark -across diverse platforms provided by volunteers similar to [SETI@home](https://setiathome.berkeley.edu/). -However, instead of searching for extraterrestrial intelligence, we are -searching for optimal software/hardware combination to run various AI and ML workloads -in terms of performance, accuracy, power and costs ... - -This benchmark is composed from [portable and reusable automation recipes](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) -developed by [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -to modularize complex AI and ML Systems and automate their benchmarking, design space exploration, optimization and deployment -across continuously evolving software, hardware, models and data. - -*If you submit your results before 1pm PST on Friday 3rd, 2023, - they will be accepted for the official MLPerf inference v3.0 submission round - and your name acknowledged in the notes!* - - -# System preparation - -## Minimal system requirements - -* CPU: any x86-64 or Arm64 based machine -* GPU: any relatively modern Nvidia GPU with 8GB+ memory and CUDA 11.4+ -* OS: we have tested this automation on Ubuntu 20.04, Ubuntu 22.04 and Debian 10 -* Disk space: ~10GB -* Python: 3.8+ -* All other dependencies (artifacts and tools) will be installed by the CM meta-framework aka (CK2) - -## Install CM (CK2) automation meta-framework - -Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install the [MLCommons CM framework](https://github.com/mlcommons/ck) -(the 2nd generation on the Collective Mind framework) on your system. - -## Pull CM repository with portable automation recipes - -Pull MLCommons CM repository with [cross-platform CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) -supporting portable MLOps and DevOps: - -```bash -cm pull repo mlcommons@ck -``` - -CM pulls all such repositories into the `$HOME/CM` directory to search for portable CM automation recipes and artifacts. - -We use the unified CM CLI & Python API of [portable and reusable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) -to compose portable automation pipelines (also implemented as CM scripts) that can automatically detect or install all necessary artifacts (tools, models, datasets, libraries, etc) -required to run a given software project such as the MLPerf inference benchmark. - -These CM scripts simply wrap existing native scripts and tools as simple micro-services -with a human-readable CLI and simple Python API to be able to easily connect them together -and run on any platform in a unified way. - -## Detect or install CUDA - -Run the following CM script: -```bash -cm run script "get cuda" --out=json -``` - -If CUDA is automatically detected, it will be registered in the CM cache: -```bash -cm show cache --tags=get,cuda -``` - -Otherwise, this script will attempt to download and install the latest CUDA -from Nvidia website. - -Please report any issue with CM scripts [here](https://github.com/mlcommons/ck/issues). - -### Test CUDA installation - -You can test if CUDA toolkit and driver was detected or installed successfully using the following command: -```bash -cm run script "get cuda-devices" -``` - -You should see similar output: -```txt -Checking compiler version ... - -nvcc: NVIDIA (R) Cuda compiler driver -Copyright (c) 2005-2022 NVIDIA Corporation -Built on Wed_Sep_21_10:33:58_PDT_2022 -Cuda compilation tools, release 11.8, V11.8.89 -Build cuda_11.8.r11.8/compiler.31833905_0 - -Compiling program ... - -Running program ... - - - Running postprocess ... -GPU Device ID: 0 -GPU Name: Tesla K80 -GPU compute capability: 3.7 -CUDA driver version: 11.4 -CUDA runtime version: 11.8 -Global memory: 11997020160 -Max clock rate: 823.500000 MHz -Total amount of shared memory per block: 49152 -Total number of registers available per block: 65536 -Warp size: 32 -Maximum number of threads per multiprocessor: 2048 -Maximum number of threads per block: 1024 -Max dimension size of a thread block X: 1024 -Max dimension size of a thread block Y: 1024 -Max dimension size of a thread block Z: 64 -Max dimension size of a grid size X: 2147483647 -Max dimension size of a grid size Y: 65535 -Max dimension size of a grid size Z: 65535 - - - running time of script "get,cuda-devices": 4.16 sec. - -``` - -## Install Python virtual environment - -```bash -cm run script "get sys-utils-cm" --quiet - -cm run script "install python-venv" --name=mlperf-cuda -``` - -If you want to install specific version of Python use the following command: -```bash -cm run script "install python-venv" --version=3.10.8 --name=mlperf-cuda -``` - -## Detect or install cuDNN - -```bash -cm run script "get cudnn" -``` - -If cuDNN is not detected on your system, you can download a TAR file -from [Nvidia website](https://developer.nvidia.com/cudnn) and then use the same CM script -to install it as follows: -```bash -cm run script "get cudnn" --tar_file= -``` - -We have tested this project with the following tar file `cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz`. - -## Detect or install TensorRT - -```bash -cm run script "get tensorrt" -``` -If TensorRT is not detected on your system, you can download a TAR file -from [Nvidia website](https://developer.nvidia.com/tensorrt) and then use the same CM script -to install it as follows: -```bash -cm run script "get tensorrt" --tar_file= -``` - -We have tested this project with the following tar file `TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz`. - - -## Run MLPerf inference benchmark with BERT - -### Try ONNX runtime backend - -#### Do a test run to detect and record the system performance - -```bash -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --implementation=reference \ - --device=cuda --backend=onnxruntime --quiet -``` - -#### Do a full accuracy run for all the scenarios - -```bash -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=onnxruntime --quiet \ - --execution-mode=valid --results_dir=$HOME/inference_3.0_results -``` - -#### Do a full performance run for all the scenarios - -```bash -cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=onnxruntime --quiet \ - --execution-mode=valid --results_dir=$HOME/inference_3.0_results -``` - -#### Populate the README files - -```bash -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=onnxruntime --quiet \ - --execution-mode=valid --results_dir=$HOME/inference_3.0_results -``` - -#### Generate MLPerf submission tree - -We should use the master branch of MLCommons inference repo for the submission checker. -You can use `--hw_note_extra` option to add your name to the notes. - -```bash -cm run script --tags=generate,inference,submission \ - --results_dir=$HOME/inference_3.0_results/valid_results \ - --adr.python.name=mlperf-cuda \ - --device=cuda --submission_dir=$HOME/inference_submission_tree --clean \ - --run-checker --submitter=cTuning --adr.inference-src.version=master - --hw_notes_extra="Result taken by " --quiet -``` - -#### Push the results to GitHub repo - -First create a fork of [this GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0). -Then run the following command after replacing `--repo_url` with your fork URL. - -```bash -cm run script --tags=push,github,mlperf,inference,submission \ - --submission_dir=$HOME/inference_submission_tree \ - --adr.python.name=mlperf-cuda \ - --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0 \ - --commit_message="Bert crowd-results added" -``` - -Create a PR to the [GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) - - - -### Try PyTorch backend - -You can run the same commands with PyTorch by rerunning all above commands and replacing `--backend=onnxruntime` with `--backend=pytorch`. - -For example, - -```bash -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=pytorch --execution-mode=valid \ - --results_dir=$HOME/inference_3.0_results --quiet -``` - - -## Test composable ML benchmark with other models, data sets, frameworks and platforms - -* [GUI to prepare CM command line and run benchmark](https://cknowledge.org/mlperf-inference-gui) -* [GUI to compare performance, accuracy, power and costs of ML/SW/HW combinations](https://cKnowledge.org/cm-gui-graph) - - -# The next steps - -Feel free to join our [open taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -and the public [Discord server](https://discord.gg/JjWNWXKxwT) to learn about our roadmap and related community projects. - -Our ultimate goal is to help anyone automatically find or generate the optimal software/hardware stack from the cloud to the edge -for their AI/ML tasks based on their requrements and constraints (accuracy, performance, power consumption, costs, etc). - -*Prepared by [Arjun Suresh](https://www.linkedin.com/in/arjunsuresh) and [Grigori Fursin](https://cKnowledge.org/gfursin) (OctoML, MLCommons, cTuning foundation)* diff --git a/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md b/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md deleted file mode 100644 index 824279732e..0000000000 --- a/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md +++ /dev/null @@ -1,87 +0,0 @@ -## Setup -Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. -Download the ck repo to get the CM script for MLPerf submission -``` -cm pull repo mlcommons@ck -``` -## Run Commands - -Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. - -On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. - -### Onnxruntime backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for a closed division including the compliance tests -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy run for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - -#### Do a full performance run for all the scenarios -``` -cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - -#### Generate actual submission tree - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` - -#### Push the results to GitHub repo - -First create a fork of [this repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/). Then run the following command after replacing `--repo_url` with your fork URL. -``` -cm run script --tags=push,github,mlperf,inference,submission \ ---submission_dir=$HOME/inference_submission_tree \ ---repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0/ \ ---commit_message="Bert results added" -``` - -Create a PR to [cTuning repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) - -## Tensorflow backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ ---results_dir=$HOME/inference_3.0_results --quiet -``` - -## Pytorch backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - diff --git a/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md b/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md deleted file mode 100644 index 9129004321..0000000000 --- a/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md +++ /dev/null @@ -1,74 +0,0 @@ -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - -On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. - -### Onnxruntime backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_full,_all-scenarios --model=resnet50 \ ---device=cpu --backend=onnxruntime --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for a closed division including the compliance tests -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy run for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ ---implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - -#### Do a full performance run for all the scenarios -``` -cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios --model=resnet50 --device=cpu \ ---implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios --model=resnet50 --device=cpu \ ---implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - -#### Generate actual submission tree - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ ---submission_dir=$HOME/inference_submission_tree --clean \ ---run-checker --submitter=cTuning --adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` - -#### Push the results to GitHub repo - -First create a fork of [this repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/). Then run the following command after replacing `--repo_url` with your fork URL. -``` -cm run script --tags=push,github,mlperf,inference,submission --submission_dir=$HOME/inference_submission_tree \ ---repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0/ \ ---commit_message="ResNet50 results added" -``` - -Create a PR to [cTuning repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) - -## Tensorflow backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ ---implementation=reference --backend=tf --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - -## TVM backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ ---implementation=reference --backend=tvm-onnx --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` diff --git a/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md b/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md deleted file mode 100644 index c35aada995..0000000000 --- a/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md +++ /dev/null @@ -1,47 +0,0 @@ -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - - -Requirements: You need to have CUDA, cuDNN and TensorRT installed on your system. - -If CUDA is not detected, CM should download and install it automatically when you run the workflow. - -For x86 machines, you can download the tar files for cuDNN and TensorRT and install them using the following commands -```bash -cm run script --tags=get,cudnn --tar_file= -``` - -```bash -cm run script --tags=get,tensorrt --tar_file= -``` - -On other systems you can do a package manager install and then CM should pick up the installation automatically during the workflow run. - -Nvidia run configuration values for each model-sceraio for known systems are stored in `__init__.py` files under configs directory. For custom systems these are stored under `custom.py` files. When custom config files are generated they override the default config values with empty ones (not desirable). So, you'll probably need to open the custom config file and comment out the overrides. Typically `gpu_batch_size` and `offline_expected_qps` are enough for an offline scenario run on a typical single GPU system. - - -## Build Nvidia Inference Server -``` -cm run script --tags=build,nvidia,inference,server -``` - -## Run ResNet50 - -### Find SUT performance - -``` -cm run script --tags=generate,run-cmds,inference,_find-performance --model=resnet50 --implementation=nvidia-original \ ---device=cuda --adr.nvidia-harness.gpu_batch_size=64 --results_dir=$HOME/nvidia_original_results -``` - -### Do a complete submission run - -``` -cm run script --tags=generate,run-cmds,inference,_submission,_full --execution_mode=valid --model=resnet50 \ ---implementation=nvidia-original --device=cuda --adr.nvidia-harness.gpu_batch_size=64 \ ---adr.nvidia-harness.skip_preprocess=yes --adr.nvidia-harness.make_cmd=run_harness \ ---results_dir=$HOME/nvidia_original_results --submission_dir=$HOME/nvidia_original_submissions \ ---division=open --submitter=cTuning --category=edge -``` - diff --git a/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md b/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md deleted file mode 100644 index cb74086b54..0000000000 --- a/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md +++ /dev/null @@ -1,25 +0,0 @@ -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - -## Create an AWS Graviton Instance - -``` -cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ -cp credentials.example credentials.sh -``` -Update `credentials.sh` with your AWS Key, Secret and Token - -``` -cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ ---cminit --key_file=$HOME/cmuser.pem -``` - -The above command will output the IP of the created instance which will be having CM setup already done - -Copy the imagenet dataset to the created instance. For example, - -``` -rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: -``` - diff --git a/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd b/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd deleted file mode 100644 index 87fa4e9ba2..0000000000 --- a/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd +++ /dev/null @@ -1,3 +0,0 @@ -cm run script "get git repo _repo.https://github.com/ctuning/mlperf_inference_submissions_v3.0" --extra_cache_tags=mlperf-inference-results,version-3.0 -cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.1" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.1 -cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.0" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.0 diff --git a/pyproject.toml b/pyproject.toml index 3eea978b4f..c05abc8ab1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,2 @@ [build-system] -requires = ["setuptools>=60", "wheel", "cmind @ git+https://git@github.com/mlcommons/ck.git@a4c6a7b477af5f1e7099c55f5468a47854adaa6c#egg=cmind&subdirectory=cm"] +requires = ["setuptools>=60", "wheel", "cmind"] diff --git a/script/app-image-classification-onnx-py/_cm.yaml b/script/app-image-classification-onnx-py/_cm.yaml index 2e2241a07e..740a8a18ab 100644 --- a/script/app-image-classification-onnx-py/_cm.yaml +++ b/script/app-image-classification-onnx-py/_cm.yaml @@ -106,7 +106,7 @@ input_description: docker: skip_run_cmd: 'no' skip_cm_sys_upgrade: 'yes' - cm_repo_flags: '--checkout=dev' + cm_repo_flags: '--branch=dev' use_host_group_id: 'yes' image_tag_extra: '-cm-dev' input_paths: diff --git a/script/app-mlperf-inference-amd/_cm.yaml b/script/app-mlperf-inference-amd/_cm.yaml index 78ecad9f88..305578a17a 100644 --- a/script/app-mlperf-inference-amd/_cm.yaml +++ b/script/app-mlperf-inference-amd/_cm.yaml @@ -146,6 +146,12 @@ variations: CM_MLPERF_DEVICE: gpu CM_MLPERF_DEVICE_LIB_NAMESPEC: cudart + rocm: + group: device + env: + CM_MLPERF_DEVICE: rocm + CM_MLPERF_DEVICE_LIB_NAMESPEC: rocm + openshift: group: backend default: true @@ -161,6 +167,10 @@ variations: deps: - tags: get,generic-python-lib,_torch_cuda + pytorch,rocm: + deps: + - tags: get,generic-python-lib,_torch,_rocm + pytorch,cpu: deps: - tags: get,generic-python-lib,_torch @@ -243,9 +253,16 @@ variations: llama2-70b_: deps: + - tags: get,generic-python-lib,_package.compressed_tensors + names: + - compressed_tensors - tags: get,preprocessed,dataset,openorca,_mlc,_validation - - tags: get,ml-model,llama2,_fp32,_pytorch - tags: get,ml-model,llama2,_amd,_pytorch + skip_if_env: + CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST: + - 'yes' + CM_RUN_STATE_DOCKER: + - 'yes' - tags: get,preprocessed,dataset,openorca,_mlc,_validation - tags: download,file,_url.https://github.com/vllm-project/vllm/blob/38c4b7e863570a045308af814c72f4504297222e/tests/fp8_kv/llama2-70b-fp8-kv/kv_cache_scales.json extra_cache_tags: llama2-scales,kv-cache @@ -255,8 +272,8 @@ variations: - tags: get,generic-python-lib,_package.vllm names: - vllm - - tags: get,git,repo,_repo.https://github.com/mlcommons/submissions_inference_v4.1 - extra_cache_tags: inference,submissions + - tags: get,git,repo,_repo.https://github.com/mlcommons/inference_results_v4.1,_branch.cm-code-only + extra_cache_tags: inference,results env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_INFERENCE_RESULTS_PATH @@ -315,5 +332,10 @@ variations: env: CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.0 + r4.1_default: + group: version + env: + CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.1 + docker: real_run: False diff --git a/script/app-mlperf-inference-amd/run-llama2.sh b/script/app-mlperf-inference-amd/run-llama2.sh index 4692bfcc46..10f36f8ca2 100644 --- a/script/app-mlperf-inference-amd/run-llama2.sh +++ b/script/app-mlperf-inference-amd/run-llama2.sh @@ -5,8 +5,6 @@ set -xeu N_SAMPLES=${N_SAMPLES:-24576} #24576 #3072 #2457 #6 TP=1 DP=${DP:-8} -WD=${WD:-0} -SORTING=${SORTING:-descending} #ascending #descending #lexicographic #skip export HIP_FORCE_DEV_KERNARG=1 export VLLM_USE_TRITON_FLASH_ATTN=0 @@ -14,12 +12,11 @@ export VLLM_FP8_PADDING=1 export VLLM_FP8_ACT_PADDING=1 export VLLM_FP8_WEIGHT_PADDING=1 export VLLM_FP8_REDUCE_CONV=1 -export VLLM_SCHED_PREFILL_KVC_FREEPCT=31.0 export HARNESS_DISABLE_VLLM_LOGS=1 export VLLM_LOGGING_LEVEL=ERROR -MODEL_PATH=${CM_ML_MODEL_LLAMA2_FILE_WITH_PATH:-/data/llm/llama2-70b-chat/} +MODEL_PATH=${LLAMA2_CHECKPOINT_PATH:-/data/llm/llama2-70b-chat/} DATASET_PATH=${CM_DATASET_OPENORCA_PREPROCESSED_PATH:-/data/open_orca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz} QUANTIZED_WEIGHTS_PATH=${CM_LLAMA2_FINAL_SAFE_TENSORS_PATH:-quantized/quark_share/modelzoo/llama2_70b_wfp8_afp8_ofp8_nomerge/json-safetensors/llama.safetensors} QUANTIZATION_PARAM_PATH=${QUANTIZATION_PARAM_PATH:-/app/kv_cache_scales.json} @@ -33,8 +30,8 @@ LOG_DIR=${CM_MLPERF_OUTPUT_DIR} cp $USER_CONF ${LOG_DIR}/user.conf -cmd ="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_Offline.py \ - --scenario ${CM_MLPERF_LOADGEN_SCENARIO \ +COMMON_CMD_OPTIONS="\ + --scenario ${CM_MLPERF_LOADGEN_SCENARIO} \ --output-log-dir ${LOG_DIR} \ --model-path $MODEL_PATH \ --mlperf-conf $MLPERF_CONF \ @@ -49,8 +46,31 @@ cmd ="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_O -dp ${DP} \ --quantization fp8 \ --quantized-weights-path ${QUANTIZED_WEIGHTS_PATH} \ - --quantization-param-path ${QUANTIZATION_PARAM_PATH} \ + --quantization-param-path ${QUANTIZATION_PARAM_PATH}" + +if [ "${CM_MLPERF_LOADGEN_MODE}" == "accuracy" ]; then + COMMON_CMD_OPTIONS+=" --accuracy" +fi + +if [ "${CM_MLPERF_LOADGEN_SCENARIO}" == "Offline" ]; then + WD=${WD:-0} + SORTING=${SORTING:-descending} #ascending #descending #lexicographic #skip + export VLLM_SCHED_PREFILL_KVC_FREEPCT=31.0 + # generate run command + cmd="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_Offline.py \ + ${COMMON_CMD_OPTIONS} \ --warmup-duration ${WD} \ --sorting ${SORTING} \ --enforce-eager True \ --gpu-memory-utilization 0.99" +else + # generate run command + cmd="${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_AMD_LLAMA2_CODE_PATH}/mainVllmFp8_SyncServer.py \ + ${COMMON_CMD_OPTIONS} \ + --enable-warm-up \ + --enable-batcher" +fi + +echo "${cmd}" +# uncomment the below lines for testing +#eval "${cmd}" diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml index 4f0cdbacec..0975f0b0b6 100644 --- a/script/app-mlperf-inference-intel/_cm.yaml +++ b/script/app-mlperf-inference-intel/_cm.yaml @@ -940,10 +940,10 @@ variations: names: - pip-package - accelerate - - tags: get,generic-python-lib,_package.torch,_path.https://download.pytorch.org/whl/nightly/cpu-cxx11-abi/torch-2.1.0.dev20230715%2Bcpu.cxx11.abi-cp39-cp39-linux_x86_64.whl - names: - - pip-package - - pip-torch + - tags: install,pytorch,from-src,_for-intel-mlperf-inference-v3.1-dlrm-v2 + names: + - pytorch + - torch dlrm-v2_: env: {} diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index b648dc54e0..0b9e2fd95d 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -394,6 +394,7 @@ deps: names: - ml-model - sdxl-model + - ml-model-float16 enable_if_env: CM_MODEL: - stable-diffusion-xl @@ -434,6 +435,9 @@ deps: skip_if_env: CM_MLPERF_CUSTOM_MODEL_PATH: - "on" + skip_if_env: + CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: + - 'yes' ## 3d-unet - tags: get,ml-model,medical-imaging,3d-unet @@ -555,6 +559,9 @@ deps: enable_if_env: CM_MODEL: - mixtral-8x7b + skip_if_env: + CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: + - 'yes' ## Kits19 for 3d-unet - tags: get,dataset,kits19,preprocessed @@ -919,6 +926,10 @@ variations: env: CM_MODEL: stable-diffusion-xl CM_NUM_THREADS: "1" + adr: + mlperf-implementation: + tags: _branch.dev + version: custom deps: - tags: get,generic-python-lib,_package.diffusers names: @@ -989,6 +1000,9 @@ variations: - tags: get,generic-python-lib,_package.more-itertools names: - more-itertools + - tags: get,generic-python-lib,_package.compressed_tensors + names: + - compressed_tensors llama2-70b-99: group: models diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index ca97bbf0d0..0272c516f2 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -70,15 +70,24 @@ def preprocess(i): x="" if os_info['platform'] == 'windows' else "'" - if "llama2-70b" in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"]: - env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf-conf " + x+ env['CM_MLPERF_CONF'] + x + + inference_src_version = env.get('CM_MLPERF_INFERENCE_SOURCE_VERSION', '') + version_tuple = None + if inference_src_version: + version_tuple = tuple(map(int, inference_src_version.split('.'))) + + if version_tuple and version_tuple >= (4,1,1): + pass # mlperf_conf is automatically loaded by the loadgen else: - env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x + if "llama2-70b" in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"]: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf-conf " + x+ env['CM_MLPERF_CONF'] + x + else: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x if env.get('CM_NETWORK_LOADGEN', '') != "lon" and env.get('CM_MLPERF_INFERENCE_API_SERVER','')=='' and "llama2-70b" not in env['CM_MODEL']: env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') if not env['MODEL_DIR']: - env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH'))) + env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH', ''))) RUN_CMD = "" state['RUN'] = {} @@ -281,6 +290,10 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio elif "stable-diffusion-xl" in env['CM_MODEL']: env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image") + if env.get('+PYTHONPATH', '') == '': + env['+PYTHONPATH'] = [] + env['+PYTHONPATH'].append(os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", "fid")) + backend = env['CM_MLPERF_BACKEND'] device = env['CM_MLPERF_DEVICE'] if env['CM_MLPERF_DEVICE'] not in [ "gpu", "rocm" ] else "cuda" max_batchsize = env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE', '1') @@ -291,11 +304,12 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio " --dataset-path " + env['CM_DATASET_PATH_ROOT'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'].replace("bfloat", "bf").replace("float", "fp") + \ " --device " + device + \ - " --max-batchsize " + max_batchsize + \ env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ " --model-path " + env['CM_ML_MODEL_PATH'] + if "--max-batchsize" not in cmd: + cmd += " --max-batchsize " + max_batchsize if env.get('CM_COCO2014_SAMPLE_ID_PATH','') != '': cmd += " --ids-path " + env['CM_COCO2014_SAMPLE_ID_PATH'] @@ -332,14 +346,15 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio device = env['CM_MLPERF_DEVICE'] if env['CM_MLPERF_DEVICE'] != "gpu" else "cuda" cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ - " --dataset-path " + env['CM_DATASET_PREPROCESSED_PATH'] + \ + " --dataset-path " + env['CM_DATASET_MIXTRAL_PREPROCESSED_PATH'] + \ " --device " + device.replace("cuda", "cuda:0") + \ env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ - " --model-path " + env['MODEL_DIR'] + " --model-path " + env['MIXTRAL_CHECKPOINT_PATH'] cmd = cmd.replace("--count", "--total-sample-count") + cmd = cmd.replace("--max-batchsize", "--batch-size") elif "3d-unet" in env['CM_MODEL']: diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml index 15d6e45193..9102732e31 100644 --- a/script/app-mlperf-inference-nvidia/_cm.yaml +++ b/script/app-mlperf-inference-nvidia/_cm.yaml @@ -89,7 +89,6 @@ input_mapping: embedding_weights_on_gpu_part: CM_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART sdxl_batcher_time_limit: CM_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT - # Dependencies on other CM scripts deps: @@ -119,6 +118,11 @@ deps: - enable_if_env: CM_MODEL: - resnet50 + skip_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + CM_RUN_STATE_DOCKER: + - 'yes' names: - imagenet-original tags: get,dataset,original,imagenet,_full @@ -218,6 +222,11 @@ deps: - enable_if_env: CM_MODEL: - retinanet + skip_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + CM_RUN_STATE_DOCKER: + - 'yes' names: - openimages-original tags: get,dataset,original,openimages,_validation,_full,_custom-annotations @@ -225,6 +234,11 @@ deps: - enable_if_env: CM_MODEL: - retinanet + skip_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + CM_RUN_STATE_DOCKER: + - 'yes' names: - openimages-calibration tags: get,dataset,original,openimages,_calibration @@ -242,6 +256,13 @@ deps: names: - nvidia-inference-common-code + - tags: pull,git,repo + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_CODE_CHANGES: + - 'yes' + # Creates user conf for given SUT - tags: generate,user-conf,mlperf,inference names: @@ -392,6 +413,7 @@ variations: CM_ML_MODEL_WEIGHT_TRANSFORMATIONS: quantization, affine fusion CM_ML_MODEL_INPUTS_DATA_TYPE: int8 CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8 + CM_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS: 10 deps: - tags: get,generic-python-lib,_onnx-graphsurgeon version: 0.3.27 @@ -408,14 +430,13 @@ variations: CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8 deps: - tags: get,generic-python-lib,_Pillow - - tags: get,generic-python-lib,_torch - - tags: get,generic-python-lib,_torchvision - tags: get,generic-python-lib,_opencv-python - tags: get,generic-python-lib,_numpy - tags: get,generic-python-lib,_pycocotools - tags: get,generic-python-lib,_onnx-graphsurgeon - tags: get,generic-python-lib,_package.onnx - version: 1.13.1 + version: 1.14.1 + - tags: get,generic-python-lib,_package.sympy sdxl: new_env_keys: @@ -505,6 +526,7 @@ variations: - tags: get,generic-python-lib,_transformers - tags: get,generic-python-lib,_safetensors - tags: get,generic-python-lib,_onnx + - tags: get,generic-python-lib,_package.sympy - tags: get,generic-python-lib,_onnx-graphsurgeon bert-99: @@ -572,10 +594,10 @@ variations: CM_ML_MODEL_WEIGHTS_DATA_TYPE: fp16 deps: - tags: get,generic-python-lib,_toml - - tags: get,generic-python-lib,_torchvision + - tags: get,generic-python-lib,_torchvision_cuda names: - torchvision - - tags: get,generic-python-lib,_torch + - tags: get,generic-python-lib,_torch_cuda - tags: get,generic-python-lib,_nvidia-apex - tags: get,generic-python-lib,_unidecode - tags: get,generic-python-lib,_inflect @@ -591,7 +613,6 @@ variations: - CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH deps: - tags: get,dlrm,data,mlperf,inference,_nvidia - - tags: get,generic-python-lib,_torch - tags: get,generic-python-lib,_package.torchsnapshot - tags: get,generic-python-lib,_package.torchrec version: 0.3.2 @@ -740,6 +761,11 @@ variations: CM_MLPERF_NVIDIA_HARNESS_USE_TRITON: "yes" CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX3: "using_triton" + use-graphs: + group: graphs + env: + CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: "yes" + prebuild: group: run-mode env: @@ -917,6 +943,7 @@ variations: - bert-99.9 - dlrm-v2-99 - dlrm-v2-99.9 + - tags: reproduce,mlperf,inference,nvidia,harness,_calibrate inherit_variation_tags: true enable_if_env: @@ -1083,11 +1110,21 @@ variations: singlestream,resnet50: env: - SKIP_POLICIES: '1' + CM_MLPERF_NVIDIA_HARNESS_DISABLE_BETA1_SMALLK: yes + SKIP_POLICIES: '0' # skip_policies used to give better latency but is not working with 4.0 and later Nvidia codes + + server,resnet50: + env: + CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000 + CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT: True + CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: True + CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: 9 + CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: 2 multistream,resnet50: env: - SKIP_POLICIES: '1' + CM_MLPERF_NVIDIA_HARNESS_DISABLE_BETA1_SMALLK: yes + SKIP_POLICIES: '0' singlestream,run_harness: default_variations: @@ -1170,6 +1207,10 @@ variations: default_variations: batch-size: batch_size.8 + gpu_memory.8,bert_,offline,run_harness: + default_variations: + batch-size: batch_size.256 + gpu_memory.16,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1194,6 +1235,12 @@ variations: default_variations: batch-size: batch_size.64 + gpu_memory.8,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size.64 + env: + CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" + gpu_memory.16,resnet50,offline,run_harness: default_variations: batch-size: batch_size.1024 @@ -1247,6 +1294,10 @@ variations: default_variations: batch-size: batch_size.2 + gpu_memory.8,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size.2 + gpu_memory.16,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 @@ -1281,6 +1332,10 @@ variations: CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" + gpu_memory.8,rnnt,offline,run_harness: + default_variations: + batch-size: batch_size.1024 + gpu_memory.16,rnnt,offline,run_harness: default_variations: batch-size: batch_size.1024 @@ -1305,6 +1360,10 @@ variations: default_variations: batch-size: batch_size.2048 + gpu_memory.8,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size.4 + gpu_memory.16,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.4 @@ -1382,12 +1441,12 @@ variations: rtx_4090,sdxl,offline,run_harness: default_variations: batch-size: batch_size.2 - use_graphs: "True" + graphs: use-graphs rtx_4090,sdxl,server,run_harness: default_variations: batch-size: batch_size.2 - use_graphs: "True" + graphs: use-graphs rtx_4090,resnet50,offline,run_harness: default_variations: @@ -1396,6 +1455,7 @@ variations: rtx_4090,resnet50,server,run_harness: default_variations: batch-size: batch_size.32 + graphs: use-graphs rtx_4090,retinanet,offline,run_harness: default_variations: diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index f7c116b145..789ef6f9dc 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -138,10 +138,10 @@ def preprocess(i): elif env['CM_MODEL'] == "retinanet": #print(env) - dataset_path = env['CM_DATASET_PATH'] + dataset_path = env['CM_DATASET_OPENIMAGES_PATH'] #return {'return': 1, 'error': 'error'} - annotations_path = env['CM_DATASET_ANNOTATIONS_DIR_PATH'] + annotations_path = env['CM_DATASET_OPENIMAGES_ANNOTATIONS_DIR_PATH'] target_data_path_dir = os.path.join(env['MLPERF_SCRATCH_PATH'], 'data', 'open-images-v6-mlperf') if not os.path.exists(target_data_path_dir): cmds.append(f"mkdir -p {target_data_path_dir}") @@ -156,7 +156,7 @@ def preprocess(i): if not os.path.exists(target_data_path): cmds.append(f"ln -sf {dataset_path} {target_data_path}") - calibration_dataset_path=env['CM_CALIBRATION_DATASET_PATH'] + calibration_dataset_path=env['CM_OPENIMAGES_CALIBRATION_DATASET_PATH'] target_data_path_dir = os.path.join(env['MLPERF_SCRATCH_PATH'], 'data', 'open-images-v6-mlperf','calibration', 'train') if not os.path.exists(target_data_path_dir): cmds.append(f"mkdir -p {target_data_path_dir}") @@ -378,10 +378,14 @@ def preprocess(i): if audio_batch_size: run_config += f" --audio_batch_size={audio_batch_size}" - disable_encoder_plugin = env.get('CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN') - if disable_encoder_plugin and disable_encoder_plugin.lower() not in [ "no", "false" ]: + disable_encoder_plugin = str(env.get('CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN', '')) + if disable_encoder_plugin and disable_encoder_plugin.lower() not in [ "no", "false", "0", "" ]: run_config += " --disable_encoder_plugin" + disable_beta1_smallk = str(env.get('CM_MLPERF_NVIDIA_HARNESS_DISABLE_BETA1_SMALLK', '')) + if disable_beta1_smallk and disable_beta1_smallk.lower() in [ "yes", "true", "1" ]: + run_config += " --disable_beta1_smallk" + workspace_size = env.get('CM_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE') if workspace_size: run_config += f" --workspace_size={workspace_size}" @@ -393,32 +397,32 @@ def preprocess(i): if log_dir: run_config += f" --log_dir={log_dir}" - use_graphs = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS') - if use_graphs and use_graphs.lower() not in [ "no", "false" ]: + use_graphs = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS', '')) + if use_graphs and use_graphs.lower() not in [ "no", "false", "0", "" ]: run_config += " --use_graphs" - use_deque_limit = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT') - if use_deque_limit and use_deque_limit.lower() not in [ "no", "false" ]: + use_deque_limit = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT')) + if use_deque_limit and use_deque_limit.lower() not in [ "no", "false", "0" ]: run_config += " --use_deque_limit" deque_timeout_usec = env.get('CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC') if deque_timeout_usec: run_config += f" --deque_timeout_usec={deque_timeout_usec}" - use_cuda_thread_per_device = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE') - if use_cuda_thread_per_device and use_cuda_thread_per_device.lower() not in [ "no", "false" ]: + use_cuda_thread_per_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE', '')) + if use_cuda_thread_per_device and use_cuda_thread_per_device.lower() not in [ "no", "false", "0", "" ]: run_config += " --use_cuda_thread_per_device" - run_infer_on_copy_streams = env.get('CM_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS') - if run_infer_on_copy_streams and run_infer_on_copy_streams.lower() not in [ "no", "false" ]: + run_infer_on_copy_streams = str(env.get('CM_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS', '')) + if run_infer_on_copy_streams and run_infer_on_copy_streams.lower() not in [ "no", "false", "0", "" ]: run_config += " --run_infer_on_copy_streams" - start_from_device = env.get('CM_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE') - if start_from_device and start_from_device.lower() not in [ "no", "false" ]: + start_from_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE', '')) + if start_from_device and start_from_device.lower() not in [ "no", "false", "0", "" ]: run_config += " --start_from_device" - end_on_device = env.get('CM_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE') - if end_on_device and end_on_device.lower() not in [ "no", "false" ]: + end_on_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE', '')) + if end_on_device and end_on_device.lower() not in [ "no", "false", "0", "" ]: run_config += " --end_on_device" max_dlas = env.get('CM_MLPERF_NVIDIA_HARNESS_MAX_DLAS') @@ -437,16 +441,16 @@ def preprocess(i): if soft_drop: run_config += f" --soft_drop={soft_drop}" - use_small_tile_gemm_plugin = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN') - if use_small_tile_gemm_plugin and use_small_tile_gemm_plugin.lower() not in [ "no", "false" ]: + use_small_tile_gemm_plugin = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN', '')) + if use_small_tile_gemm_plugin and use_small_tile_gemm_plugin.lower() not in [ "no", "false", "0", "" ]: run_config += f" --use_small_tile_gemm_plugin" audio_buffer_num_lines = env.get('CM_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES') if audio_buffer_num_lines: run_config += f" --audio_buffer_num_lines={audio_buffer_num_lines}" - use_fp8 = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_FP8') - if use_fp8 and use_fp8.lower() not in [ "no", "false" ]: + use_fp8 = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_FP8', '')) + if use_fp8 and use_fp8.lower() not in [ "no", "false", "0", "" ]: run_config += f" --use_fp8" if "llama2" in env["CM_MODEL"]: @@ -454,7 +458,7 @@ def preprocess(i): run_config += f" --tensor_parallelism={tmp_tp_size}" enable_sort = env.get('CM_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') - if enable_sort and enable_sort.lower() not in [ "no", "false" ]: + if enable_sort and enable_sort.lower() not in [ "no", "false", "0" ]: run_config += f" --enable_sort" sdxl_server_batcher_time_limit = env.get('CM_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') @@ -473,8 +477,8 @@ def preprocess(i): if num_warmups != '': run_config += f" --num_warmups={num_warmups}" - skip_postprocess = env.get('CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS') - if skip_postprocess and skip_postprocess.lower() not in [ "no", "false" ]: + skip_postprocess = str(env.get('CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS', '')) + if skip_postprocess and skip_postprocess.lower() not in [ "no", "false", "0", "" ]: run_config += f" --skip_postprocess" if test_mode: diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 2b8186c886..625ebefeee 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -27,6 +27,7 @@ default_env: CM_MLPERF_RUN_STYLE: test CM_TEST_QUERY_COUNT: '10' CM_MLPERF_QUANTIZATION: off + CM_GET_PLATFORM_DETAILS: yes env: CM_MLPERF_PRINT_SUMMARY: "no" @@ -63,6 +64,7 @@ input_mapping: gpu_name: CM_NVIDIA_GPU_NAME nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH tp_size: CM_NVIDIA_TP_SIZE + use_dataset_from_host: CM_USE_DATASET_FROM_HOST # Duplicate CM environment variables to the ones used in native apps env_key_mappings: @@ -101,7 +103,12 @@ deps: - tags: get,mlcommons,inference,src names: - inference-src - + - tags: pull,git,repo + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: + - 'yes' - tags: get,mlperf,inference,utils - tags: install,pip-package,for-cmind-python,_package.pandas @@ -111,6 +118,15 @@ deps: posthook_deps: - tags: get,mlperf,sut,description #populate system meta information like framework + - tags: get,platform,details + enable_if_any_env: + CM_GET_PLATFORM_DETAILS: + - yes + skip_if_env: + CM_MLPERF_LOADGEN_MODE: + - accuracy + env: + CM_PLATFORM_DETAILS_FILE_PATH: '<<>>/system_info.txt' # Order of variations for documentation variation_groups_order: @@ -298,6 +314,7 @@ variations: nvidia-original,r4.1-dev_default: docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + image_name: mlperf-inference-nvidia-v4.1-dev-common nvidia-original,r4.1-dev_default,gptj_: docker: @@ -443,6 +460,11 @@ variations: cnndm-accuracy-script: tags: _int32 + amd,r4.1_default: + docker: + base_image: rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging + extra_run_args: ' --device=/dev/kfd --device=/dev/dri --device=/dev/mem' + amd: group: implementation @@ -585,10 +607,12 @@ variations: - mlperf-accuracy-script - imagenet-accuracy-script tags: run,accuracy,mlperf,_imagenet - docker: deps: - tags: get,dataset,imagenet,validation,original,_full + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' names: - imagenet-original - dataset-original @@ -614,6 +638,22 @@ variations: - openimages-accuracy-script tags: run,accuracy,mlperf,_openimages + retinanet,nvidia-original: + docker: + deps: + - names: + - openimages-original + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + tags: get,dataset,original,openimages,_validation,_full,_custom-annotations + - names: + - openimages-calibration + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + tags: get,dataset,original,openimages,_calibration + 3d-unet-99: group: model @@ -659,6 +699,7 @@ variations: 3d-unet_,reference: docker: + image_name: mlperf-inference-mlcommons-python-implementation-3d-unet deps: - enable_if_env: CM_MLPERF_DATASET_3DUNET_DOWNLOAD_TO_HOST: @@ -698,6 +739,7 @@ variations: sdxl,reference,float16: docker: + image_name: mlperf-inference-mlcommons-python-implementation-sdxl-float16 deps: - enable_if_env: CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST: @@ -706,6 +748,7 @@ variations: sdxl,reference,bfloat16: docker: + image_name: mlperf-inference-mlcommons-python-implementation-sdxl-bfloat16 deps: - enable_if_env: CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST: @@ -714,6 +757,7 @@ variations: sdxl,reference,float32: docker: + image_name: mlperf-inference-mlcommons-python-implementation-sdxl-float32 deps: - enable_if_env: CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST: @@ -765,11 +809,23 @@ variations: llama2-70b_,reference: docker: + image_name: mlperf-inference-mlcommons-python-implementation-llama2-70b deps: - enable_if_env: CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST: - 'yes' tags: get,ml-model,llama2 + + llama2-70b_,amd: + docker: + image_name: mlperf-inference-amd-python-implementation-llama2-70b + mounts: + - "${{ CM_LLAMA2_FINAL_SAFE_TENSORS_PATH }}:${{ CM_LLAMA2_FINAL_SAFE_TENSORS_PATH }" + deps: + - enable_if_env: + CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST: + - 'yes' + tags: get,ml-model,llama2,_amd,_pytorch mixtral-8x7b: group: @@ -799,6 +855,26 @@ variations: - openorca-gsm8k-mbxp-combined-accuracy-script tags: run,accuracy,mlperf,_openorca-gsm8k-mbxp,_int32 + mixtral-8x7b,reference: + docker: + deps: + - tags: get,ml-model,mixtral + names: + - ml-model + - mixtral-model + enable_if_env: + CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: + - 'yes' + - tags: get,dataset-mixtral,openorca-mbxp-gsm8k-combined + names: + - openorca-mbxp-gsm8k-combined-preprocessed + enable_if_env: + CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: + - 'yes' + mounts: + - "${{ MIXTRAL_CHECKPOINT_PATH }}:${{ MIXTRAL_CHECKPOINT_PATH }}" + - "${{ CM_DATASET_MIXTRAL_PREPROCESSED_PATH }}:${{ CM_DATASET_MIXTRAL_PREPROCESSED_PATH }}" + rnnt: group: model @@ -960,11 +1036,15 @@ variations: docker: deps: - tags: get,dlrm,data,mlperf,inference,_nvidia + mounts: + - "${{ DLRM_DATA_PATH }}:/home/mlperf_inf_dlrmv2" dlrm_,intel: docker: deps: - tags: get,preprocessed,dataset,criteo,_mlc + mounts: + - "${{ DLRM_DATA_PATH }}:${{ DLRM_DATA_PATH }}" dlrm_,reference: docker: @@ -972,7 +1052,8 @@ variations: - tags: get,preprocessed,dataset,criteo,_mlc - tags: get,ml-model,dlrm,_pytorch,_fp32 mounts: - - ${{ CM_ML_MODEL_FILE_WITH_PATH }}:${{ CM_ML_MODEL_FILE_WITH_PATH }} + - "${{ CM_ML_MODEL_FILE_WITH_PATH }}:${{ CM_ML_MODEL_FILE_WITH_PATH }}" + - "${{ DLRM_DATA_PATH }}:${{ DLRM_DATA_PATH }}" dockerfile_env: CM_ML_MODEL_FILE_WITH_PATH: "on" @@ -1455,10 +1536,10 @@ variations: add_deps_recursive: nvidia-inference-common-code: version: r4.0 - tags: _go + tags: _mlcommons nvidia-inference-server: version: r4.0 - tags: _go + tags: _mlcommons intel-harness: tags: _v4.0 default_env: @@ -1595,23 +1676,25 @@ docker: - cm pull repo mounts: - "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}" + - "${{ CM_DATASET_OPENIMAGES_PATH }}:${{ CM_DATASET_OPENIMAGES_PATH }}" + - "${{ CM_OPENIMAGES_CALIBRATION_DATASET_PATH }}:${{ CM_OPENIMAGES_CALIBRATION_DATASET_PATH }}" + - "${{ CM_DATASET_OPENIMAGES_ANNOTATIONS_DIR_PATH }}:${{ CM_DATASET_OPENIMAGES_ANNOTATIONS_DIR_PATH }}" - "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}" - "${{ OUTPUT_BASE_DIR }}:${{ OUTPUT_BASE_DIR }}" - "${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}:${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}" - "${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }}" - "${{ CM_CRITEO_PREPROCESSED_PATH }}:${{ CM_CRITEO_PREPROCESSED_PATH }}" - "${{ LLAMA2_CHECKPOINT_PATH }}:${{ LLAMA2_CHECKPOINT_PATH }}" - - "${{ DLRM_DATA_PATH }}:/home/mlperf_inf_dlrmv2" - "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}" - "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}" - "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}" skip_run_cmd: 'no' shm_size: '32gb' interactive: True - extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu cm_repo: mlcommons@cm4mlops - cm_repo_flags: ' --branch=mlperf-inference ' + cm_repo_branch: mlperf-inference real_run: False os_version: '22.04' docker_input_mapping: diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index c1fe196828..64cb42a8ed 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -122,6 +122,18 @@ def postprocess(i): pass # Not giving an error now. But accuracy paths need to be done for other benchmarks which may need the non-determinism test #return {'return': 1, 'error': f'Accuracy paths not done for model {model}'} scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + + if not state.get('cm-mlperf-inference-results'): + state['cm-mlperf-inference-results'] = {} + if not state.get('cm-mlperf-inference-results-last'): + state['cm-mlperf-inference-results-last'] = {} + if not state['cm-mlperf-inference-results'].get(state['CM_SUT_CONFIG_NAME']): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']].get(model): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model].get(scenario): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario] = {} + #if env.get("CM_MLPERF_FIND_PERFORMANCE_MODE", '') == "yes" and mode == "performance" and scenario != "Server": if mode == "performance" and scenario != "Server": @@ -230,7 +242,7 @@ def postprocess(i): if os.path.exists(env['CM_MLPERF_USER_CONF']): shutil.copy(env['CM_MLPERF_USER_CONF'], 'user.conf') - result, valid, power_result = mlperf_utils.get_result_from_log(env['CM_MLPERF_LAST_RELEASE'], model, scenario, output_dir, mode) + result, valid, power_result = mlperf_utils.get_result_from_log(env['CM_MLPERF_LAST_RELEASE'], model, scenario, output_dir, mode, env.get('CM_MLPERF_INFERENCE_SOURCE_VERSION')) power = None power_efficiency = None if power_result: @@ -239,17 +251,6 @@ def postprocess(i): power = power_result_split[0] power_efficiency = power_result_split[1] - if not state.get('cm-mlperf-inference-results'): - state['cm-mlperf-inference-results'] = {} - if not state.get('cm-mlperf-inference-results-last'): - state['cm-mlperf-inference-results-last'] = {} - if not state['cm-mlperf-inference-results'].get(state['CM_SUT_CONFIG_NAME']): - state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']] = {} - if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']].get(model): - state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model] = {} - if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model].get(scenario): - state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario] = {} - state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode] = result state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][mode+'_valid'] = valid.get(mode, False) diff --git a/script/authenticate-github-cli/customize.py b/script/authenticate-github-cli/customize.py index 14b762e0f2..a873791f43 100644 --- a/script/authenticate-github-cli/customize.py +++ b/script/authenticate-github-cli/customize.py @@ -13,7 +13,12 @@ def preprocess(i): cmd = "gh auth login" if env.get('CM_GH_AUTH_TOKEN', '') != '': - cmd = f" echo {env['CM_GH_AUTH_TOKEN']} | {cmd} --with-token" + if os_info['platform'] == 'windows': + with open("token", "w") as f: + f.write(env['CM_GH_AUTH_TOKEN']) + cmd = f"{cmd} --with-token < token" + else: + cmd = f" echo {env['CM_GH_AUTH_TOKEN']} | {cmd} --with-token" env['CM_RUN_CMD'] = cmd quiet = (env.get('CM_QUIET', False) == 'yes') diff --git a/script/authenticate-github-cli/run.bat b/script/authenticate-github-cli/run.bat index 648302ca71..2366ffc076 100644 --- a/script/authenticate-github-cli/run.bat +++ b/script/authenticate-github-cli/run.bat @@ -1 +1,19 @@ -rem native script +@echo off +echo Running gh auth: +REM Not printing CM_RUN_CMD as it can contain secret +REM echo %CM_RUN_CMD% +echo. + +REM Check if CM_FAKE_RUN is not equal to "yes" +if not "%CM_FAKE_RUN%"=="yes" ( + + REM Execute the command stored in CM_RUN_CMD + REM %CM_RUN_CMD% + echo %CM_GH_AUTH_TOKEN% | gh auth login --with-token + + REM Check the exit code and exit with error if non-zero + if %ERRORLEVEL% neq 0 ( + exit /b 1 + ) +) + diff --git a/script/authenticate-github-cli/run.sh b/script/authenticate-github-cli/run.sh index b17d52e4b3..58c52dad6b 100644 --- a/script/authenticate-github-cli/run.sh +++ b/script/authenticate-github-cli/run.sh @@ -7,8 +7,8 @@ #${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency -echo "Running: " -echo "${CM_RUN_CMD}" +echo "Running gh auth: " #Not printing as it can contain secret +#echo "${CM_RUN_CMD}" echo "" if [[ ${CM_FAKE_RUN} != "yes" ]]; then diff --git a/script/benchmark-program-mlperf/customize.py b/script/benchmark-program-mlperf/customize.py index a333b6c078..4ca4f70df5 100644 --- a/script/benchmark-program-mlperf/customize.py +++ b/script/benchmark-program-mlperf/customize.py @@ -16,13 +16,45 @@ def postprocess(i): if env.get('CM_MLPERF_POWER', '') == "yes": - if os_info['platform'] == 'windows': - return {'return':1, 'error':'TBD: this script is not yet supported on Windows'} - + if env.get('CM_MLPERF_SHORT_RANGING_RUN', '') != 'no': - os.system("echo '0' > "+env.get('CM_RUN_DIR','')+ "/count.txt") - env['CM_MLPERF_RUN_CMD'] = "CM_MLPERF_RUN_COUNT=\$(cat \${CM_RUN_DIR}/count.txt); echo \${CM_MLPERF_RUN_COUNT}; CM_MLPERF_RUN_COUNT=\$((CM_MLPERF_RUN_COUNT+1)); echo \${CM_MLPERF_RUN_COUNT} > \${CM_RUN_DIR}/count.txt && if [ \${CM_MLPERF_RUN_COUNT} -eq \'1\' ]; then export CM_MLPERF_USER_CONF=\${CM_MLPERF_RANGING_USER_CONF}; else export CM_MLPERF_USER_CONF=\${CM_MLPERF_TESTING_USER_CONF}; fi && "+env.get('CM_RUN_CMD','').strip() + # Write '0' to the count.txt file in CM_RUN_DIR + count_file = os.path.join(env.get('CM_RUN_DIR', ''), 'count.txt') + with open(count_file, 'w') as f: + f.write('0') + + if os_info['platform'] != 'windows': + # Construct the shell command with proper escaping + env['CM_MLPERF_RUN_CMD'] = r""" +CM_MLPERF_RUN_COUNT=\$(cat \${CM_RUN_DIR}/count.txt); +echo \${CM_MLPERF_RUN_COUNT}; +CM_MLPERF_RUN_COUNT=\$((CM_MLPERF_RUN_COUNT+1)); +echo \${CM_MLPERF_RUN_COUNT} > \${CM_RUN_DIR}/count.txt && +if [ \${CM_MLPERF_RUN_COUNT} -eq 1 ]; then +export CM_MLPERF_USER_CONF="${CM_MLPERF_RANGING_USER_CONF}"; +else +export CM_MLPERF_USER_CONF="${CM_MLPERF_TESTING_USER_CONF}"; +fi && + """ + env.get('CM_RUN_CMD', '').strip() + else: + env['CM_MLPERF_RUN_CMD'] = r""" +:: Read the current count from the file +set /p CM_MLPERF_RUN_COUNT=<%CM_RUN_DIR%\count.txt +echo !CM_MLPERF_RUN_COUNT! + +:: Increment the count +set /a CM_MLPERF_RUN_COUNT=!CM_MLPERF_RUN_COUNT! + 1 +echo !CM_MLPERF_RUN_COUNT! > %CM_RUN_DIR%\count.txt + +:: Check the value and set the environment variable accordingly +if !CM_MLPERF_RUN_COUNT! EQU 1 ( + set CM_MLPERF_USER_CONF=%CM_MLPERF_RANGING_USER_CONF% +) else ( + set CM_MLPERF_USER_CONF=%CM_MLPERF_TESTING_USER_CONF% +) + """ + env.get('CM_RUN_CMD', '').strip() else: - env['CM_MLPERF_RUN_CMD'] = env.get('CM_RUN_CMD','').strip() + # Just use the existing CM_RUN_CMD if no ranging run is needed + env['CM_MLPERF_RUN_CMD'] = env.get('CM_RUN_CMD', '').strip() return {'return':0} diff --git a/script/benchmark-program/customize.py b/script/benchmark-program/customize.py index af8c73a323..422d82c482 100644 --- a/script/benchmark-program/customize.py +++ b/script/benchmark-program/customize.py @@ -45,7 +45,7 @@ def preprocess(i): if os_info['platform'] != 'windows' and str(env.get('CM_SAVE_CONSOLE_LOG', True)).lower() not in [ "no", "false", "0"]: logs_dir = env.get('CM_LOGS_DIR', env['CM_RUN_DIR']) - env['CM_RUN_CMD'] += " 2>&1 ; echo \$? > exitstatus | tee " + q+ os.path.join(logs_dir, "console.out") + q + env['CM_RUN_CMD'] += r" 2>&1 ; echo \$? > exitstatus | tee " + q+ os.path.join(logs_dir, "console.out") + q # additional arguments and tags for measuring system informations(only if 'CM_PROFILE_NVIDIA_POWER' is 'on') if env.get('CM_PROFILE_NVIDIA_POWER', '') == "on": @@ -73,7 +73,7 @@ def preprocess(i): # running the script as a process in background pre_run_cmd = pre_run_cmd + 'cm run script --tags=runtime,system,utilisation' + env['CM_SYS_UTILISATION_SCRIPT_TAGS'] + ' --quiet & ' # obtain the command if of the background process - pre_run_cmd += ' cmd_pid=\$!' + ' && ' + 'echo CMD_PID=\$cmd_pid' + pre_run_cmd += r" cmd_pid=\$! && echo CMD_PID=\$cmd_pid" print(f"Pre run command for recording the runtime system information: {pre_run_cmd}") env['CM_PRE_RUN_CMD'] = pre_run_cmd @@ -81,7 +81,7 @@ def preprocess(i): # generate the post run cmd - for killing the process that records runtime system infos post_run_cmd = "" if env.get('CM_PROFILE_NVIDIA_POWER', '') == "on": - post_run_cmd += "echo killing process \$cmd_pid && kill -TERM \${cmd_pid}" + post_run_cmd += r"echo killing process \$cmd_pid && kill -TERM \${cmd_pid}" print(f"Post run command for killing the process that measures the runtime system information: {post_run_cmd}") env['CM_POST_RUN_CMD'] = post_run_cmd diff --git a/script/benchmark-program/run.sh b/script/benchmark-program/run.sh index cb4eb92046..6eb39d3337 100755 --- a/script/benchmark-program/run.sh +++ b/script/benchmark-program/run.sh @@ -57,19 +57,46 @@ fi echo $CM_PRE_RUN_CMD eval ${CM_PRE_RUN_CMD} -# Check CM_RUN_CMD0 -if [[ "${CM_RUN_CMD0}" != "" ]]; then - eval ${CM_RUN_CMD0} - exitstatus=$? -else - echo "${CM_RUN_CMD}" - eval ${CM_RUN_CMD} - exitstatus=$? +# Function to run command and check exit status +run_command() { + local cmd="$1" + + if [[ -n "$cmd" ]]; then + echo "$cmd" + eval "$cmd" + exitstatus=$? + + # If 'exitstatus' file exists, overwrite the exit status with its content + if [[ -e exitstatus ]]; then + exitstatus=$(cat exitstatus) + fi + + # If exitstatus is non-zero, exit with that status + if [[ $exitstatus -ne 0 ]]; then + exit $exitstatus + fi + fi +} + +# Run CM_RUN_CMD0 if it exists, otherwise run CM_RUN_CMD +if [[ -n "$CM_RUN_CMD0" ]]; then + run_command "$CM_RUN_CMD0" fi -eval ${CM_POST_RUN_CMD} -test $? -eq 0 || exit $? +run_command "$CM_RUN_CMD" -test $exitstatus -eq 0 || $exitstatus +# Run post-run command if it exists +if [[ -n "$CM_POST_RUN_CMD" ]]; then + eval "$CM_POST_RUN_CMD" + post_exitstatus=$? + # Exit if post-run command fails + if [[ $post_exitstatus -ne 0 ]]; then + exit $post_exitstatus + fi +fi +# Final check for exitstatus and exit with the appropriate code +if [[ $exitstatus -ne 0 ]]; then + exit $exitstatus +fi diff --git a/script/build-docker-image/customize.py b/script/build-docker-image/customize.py index e66eddbd8b..a3a3bc8df7 100644 --- a/script/build-docker-image/customize.py +++ b/script/build-docker-image/customize.py @@ -54,7 +54,7 @@ def preprocess(i): image_name = get_image_name(env) if build_dockerfile: - dockerfile_path = "\${CM_DOCKERFILE_WITH_PATH}" + dockerfile_path = r"\${CM_DOCKERFILE_WITH_PATH}" # Write .dockerignore with open('.dockerignore', 'w') as f: diff --git a/script/build-dockerfile/_cm.yaml b/script/build-dockerfile/_cm.yaml index da00f24de8..7706597ee5 100644 --- a/script/build-dockerfile/_cm.yaml +++ b/script/build-dockerfile/_cm.yaml @@ -19,6 +19,7 @@ default_env: ' CM_DOCKER_OS: ubuntu CM_DOCKER_NOT_PULL_UPDATE: False + CM_MLOPS_REPO_BRANCH: mlperf-inference input_mapping: build: CM_BUILD_DOCKER_IMAGE @@ -26,6 +27,7 @@ input_mapping: cm_repo: CM_MLOPS_REPO cm_repo_flags: CM_DOCKER_ADD_FLAG_TO_CM_MLOPS_REPO cm_repos: CM_DOCKER_EXTRA_CM_REPOS + cm_repo_branch: CM_MLOPS_REPO_BRANCH comments: CM_DOCKER_RUN_COMMENTS copy_files: CM_DOCKER_COPY_FILES docker_base_image: CM_DOCKER_IMAGE_BASE diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 896454fb93..de96626070 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -3,6 +3,7 @@ import os import json import re +import shutil def preprocess(i): @@ -10,7 +11,7 @@ def preprocess(i): env = i['env'] if env["CM_DOCKER_OS"] not in [ "ubuntu", "rhel", "arch" ]: - return {'return': 1, 'error': "Currently only ubuntu, rhel and arch are supported in CM docker"} + return {'return': 1, 'error': f"Specified docker OS: {env['CM_DOCKER_OS']}. Currently only ubuntu, rhel and arch are supported in CM docker"} path = i['run_script_input']['path'] @@ -53,27 +54,67 @@ def preprocess(i): if not docker_image_base: return {'return': 1, 'error': f"Version \"{env['CM_DOCKER_OS_VERSION']}\" is not supported yet for \"{env['CM_DOCKER_OS']}\" "} - if env.get("CM_MLOPS_REPO", "") != "": - cm_mlops_repo = env["CM_MLOPS_REPO"] - # the below pattern matches both the HTTPS and SSH git link formats - git_link_pattern = r'^(https?://github\.com/([^/]+)/([^/]+)\.git|git@github\.com:([^/]+)/([^/]+)\.git)$' - if match := re.match(git_link_pattern, cm_mlops_repo): - if match.group(2) and match.group(3): - repo_owner = match.group(2) - repo_name = match.group(3) - elif match.group(4) and match.group(5): - repo_owner = match.group(4) - repo_name = match.group(5) - cm_mlops_repo = f"{repo_owner}@{repo_name}" - print(f"Converted repo format from {env['CM_MLOPS_REPO']} to {cm_mlops_repo}") - else: - cm_mlops_repo = "mlcommons@cm4mlops" + # Handle cm_mlops Repository + if env.get("CM_REPO_PATH", "") != "": + use_copy_repo = True + cm_repo_path = os.path.abspath(env["CM_REPO_PATH"]) + + if not os.path.exists(cm_repo_path): + return {'return': 1, 'error': f"Specified CM_REPO_PATH does not exist: {cm_repo_path}"} + + cmr_yml_path = os.path.join(cm_repo_path, "cmr.yaml") + if not os.path.isfile(cmr_yml_path): + return {'return': 1, 'error': f"cmr.yaml not found in CM_REPO_PATH: {cm_repo_path}"} + + # Define the build context directory (where the Dockerfile will be) + build_context_dir = os.path.dirname(env.get('CM_DOCKERFILE_WITH_PATH', os.path.join(os.getcwd(), "Dockerfile"))) + os.makedirs(build_context_dir, exist_ok=True) + + # Create cm_repo directory relative to the build context + repo_build_context_path = os.path.join(build_context_dir, "cm_repo") + + # Remove existing directory if it exists + if os.path.exists(repo_build_context_path): + shutil.rmtree(repo_build_context_path) + + try: + print(f"Copying repository from {cm_repo_path} to {repo_build_context_path}") + shutil.copytree(cm_repo_path, repo_build_context_path) + except Exception as e: + return {'return': 1, 'error': f"Failed to copy repository to build context: {str(e)}"} + + if not os.path.isdir(repo_build_context_path): + return {'return': 1, 'error': f"Repository was not successfully copied to {repo_build_context_path}"} + + # (Optional) Verify the copy + if not os.path.isdir(repo_build_context_path): + return {'return': 1, 'error': f"cm_repo was not successfully copied to the build context at {repo_build_context_path}"} + else: + print(f"cm_repo is present in the build context at {repo_build_context_path}") - if env.get("CM_MLOPS_REPO_BRANCH", '') != '': - cm_mlops_repo_branch_string = f" --branch {env['CM_MLOPS_REPO_BRANCH']}" + relative_repo_path = os.path.relpath(repo_build_context_path, build_context_dir) else: - cm_mlops_repo_branch_string = "" + # CM_REPO_PATH is not set; use cm pull repo as before + use_copy_repo = False + + if env.get("CM_MLOPS_REPO", "") != "": + cm_mlops_repo = env["CM_MLOPS_REPO"] + # the below pattern matches both the HTTPS and SSH git link formats + git_link_pattern = r'^(https?://github\.com/([^/]+)/([^/]+)(?:\.git)?|git@github\.com:([^/]+)/([^/]+)(?:\.git)?)$' + if match := re.match(git_link_pattern, cm_mlops_repo): + if match.group(2) and match.group(3): + repo_owner = match.group(2) + repo_name = match.group(3) + elif match.group(4) and match.group(5): + repo_owner = match.group(4) + repo_name = match.group(5) + cm_mlops_repo = f"{repo_owner}@{repo_name}" + print(f"Converted repo format from {env['CM_MLOPS_REPO']} to {cm_mlops_repo}") + else: + cm_mlops_repo = "mlcommons@cm4mlops" + cm_mlops_repo_branch_string = f" --branch={env['CM_MLOPS_REPO_BRANCH']}" + if env.get('CM_DOCKERFILE_WITH_PATH', '') == '': env['CM_DOCKERFILE_WITH_PATH'] = os.path.join(os.getcwd(), "Dockerfile") @@ -128,7 +169,6 @@ def preprocess(i): f.write(EOL) copy_cmds = [] if 'CM_DOCKER_COPY_FILES' in env: - import shutil for copy_file in env['CM_DOCKER_COPY_FILES']: copy_split = copy_file.split(":") if len(copy_split) != 2: @@ -202,11 +242,21 @@ def preprocess(i): f.write(EOL+'# Download CM repo for scripts' + EOL) - # Add possibility to force rebuild with some extra flag for the repository - x = env.get('CM_DOCKER_ADD_FLAG_TO_CM_MLOPS_REPO','') - if x!='': x=' '+x - - f.write('RUN cm pull repo ' + cm_mlops_repo + x + EOL) + if use_copy_repo: + docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@cm4mlops" + f.write(f'COPY --chown=cmuser:cm {relative_repo_path} {docker_repo_dest}' + EOL) + + f.write(EOL + '# Register CM repository' + EOL) + f.write('RUN cm pull repo --url={} --quiet'.format(docker_repo_dest) + EOL) + f.write(EOL) + + + else: + # Use cm pull repo as before + x = env.get('CM_DOCKER_ADD_FLAG_TO_CM_MLOPS_REPO','') + if x!='': x=' '+x + + f.write('RUN cm pull repo ' + cm_mlops_repo + cm_mlops_repo_branch_string + x + EOL) # Check extra repositories x = env.get('CM_DOCKER_EXTRA_CM_REPOS','') diff --git a/script/build-dockerfile/dockerinfo.json b/script/build-dockerfile/dockerinfo.json index adc7cf269b..df9c6c90a7 100644 --- a/script/build-dockerfile/dockerinfo.json +++ b/script/build-dockerfile/dockerinfo.json @@ -1,6 +1,6 @@ { "python-packages": [ - "cmind", "requests", "giturlparse", "tabulate" + "wheel", "cmind", "requests", "giturlparse", "tabulate" ], "ARGS": [ "CM_GH_TOKEN" diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml index 74ce30e2f1..c5003f67cc 100644 --- a/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -199,10 +199,33 @@ variations: names: - pytorch - torch + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 + - tags: get,generic-python-lib,_whl-url.https://github.com/mlcommons/cm4mlperf-inference/releases/download/mlperf-inference-v4.0/torch-2.1.0a0+git32f93b1-cp38-cp38-linux_x86_64.whl + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 + - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0 names: - pytorchvision - torchvision + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 + - tags: get,generic-python-lib,_whl-url.https://github.com/mlcommons/cm4mlperf-inference/releases/download/mlperf-inference-v4.0/torchvision-0.16.0a0+657027f-cp38-cp38-linux_x86_64.whl + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 versions: r2.1: @@ -250,10 +273,32 @@ versions: names: - pytorch - torch + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 + - tags: get,generic-python-lib,_package.torch,_whl-url.https://github.com/mlcommons/cm4mlperf-inference/releases/download/mlperf-inference-v4.0/torch-2.1.0a0+git32f93b1-cp38-cp38-linux_x86_64.whl + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0 names: - pytorchvision - torchvision + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 + - tags: get,generic-python-lib,_package.torchvision,_whl-url.https://github.com/mlcommons/cm4mlperf-inference/releases/download/mlperf-inference-v4.0/torchvision-0.16.0a0+657027f-cp38-cp38-linux_x86_64.whl + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 r4.1-dev: add_deps_recursive: @@ -270,10 +315,32 @@ versions: names: - pytorch - torch + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 + - tags: get,generic-python-lib,_package.torch,_whl-url.https://github.com/mlcommons/cm4mlperf-inference/releases/download/mlperf-inference-v4.0/torch-2.1.0a0+git32f93b1-cp38-cp38-linux_x86_64.whl + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0 names: - pytorchvision - torchvision + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 + - tags: get,generic-python-lib,_package.torchvision,_whl-url.https://github.com/mlcommons/cm4mlperf-inference/releases/download/mlperf-inference-v4.0/torchvision-0.16.0a0+657027f-cp38-cp38-linux_x86_64.whl + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - x86_64 + CM_PYTHON_MINOR_VERSION: + - 8 r4.1: add_deps_recursive: diff --git a/script/clean-nvidia-mlperf-inference-scratch-space/customize.py b/script/clean-nvidia-mlperf-inference-scratch-space/customize.py index 5a0a95e765..977a9993bb 100644 --- a/script/clean-nvidia-mlperf-inference-scratch-space/customize.py +++ b/script/clean-nvidia-mlperf-inference-scratch-space/customize.py @@ -25,6 +25,9 @@ def preprocess(i): if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'preprocessed_data': clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "preprocessed_data", "coco2014-tokenized-sdxl")} """ cache_rm_tags = "nvidia-harness,_preprocess_data,_sdxl" + if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'downloaded_model': + clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "models", "SDXL")} """ + cache_rm_tags = "nvidia-harness,_download_model,_sdxl" cache_rm_tags = cache_rm_tags + extra_cache_rm_tags diff --git a/script/detect-os/_cm.json b/script/detect-os/_cm.json index 383b7f9302..7d64c9397b 100644 --- a/script/detect-os/_cm.json +++ b/script/detect-os/_cm.json @@ -25,6 +25,11 @@ "windows" ] }, + "skip_if_env": { + "CM_WINDOWS_SYS_UTILS_MIN_INSTALL": [ + "yes" + ] + }, "tags": "get,sys-utils-min" } ], diff --git a/script/detect-os/customize.py b/script/detect-os/customize.py index 2f8dd4c76d..d59a511f6a 100644 --- a/script/detect-os/customize.py +++ b/script/detect-os/customize.py @@ -55,6 +55,9 @@ def postprocess(i): env['CM_HOST_OS_MACHINE'] = state['os_uname_machine'] + else: + env['CM_HOST_OS_PACKAGE_MANAGER'] = "choco" + import platform env['CM_HOST_SYSTEM_NAME'] = platform.node() @@ -93,6 +96,9 @@ def postprocess(i): elif env.get('CM_HOST_OS_PACKAGE_MANAGER', '') == "zypper": env['CM_HOST_OS_PACKAGE_MANAGER_INSTALL_CMD'] = "zypper install -y" env['CM_HOST_OS_PACKAGE_MANAGER_UPDATE_CMD'] = "zypper update -y" + elif env.get('CM_HOST_OS_PACKAGE_MANAGER', '') == "choco": + env['CM_HOST_OS_PACKAGE_MANAGER_INSTALL_CMD'] = "choco install -y" + env['CM_HOST_OS_PACKAGE_MANAGER_UPDATE_CMD'] = "choco upgrade -y" if os.path.exists("/.dockerenv"): env['CM_RUN_INSIDE_DOCKER'] = "yes" diff --git a/script/detect-sudo/_cm.yaml b/script/detect-sudo/_cm.yaml index 56c83b8304..64b60a5f6c 100644 --- a/script/detect-sudo/_cm.yaml +++ b/script/detect-sudo/_cm.yaml @@ -9,7 +9,7 @@ cache: false category: DevOps automation new_env_keys: - - CM_SUDO_* + - CM_SUDO* tags: - detect diff --git a/script/detect-sudo/customize.py b/script/detect-sudo/customize.py index dbc9b89705..31c72ba9d6 100644 --- a/script/detect-sudo/customize.py +++ b/script/detect-sudo/customize.py @@ -20,6 +20,8 @@ def preprocess(i): env['CM_SUDO_USER'] = "yes" if os.geteuid() == 0: env['CM_SUDO'] = '' #root user does not need sudo + else: + env['CM_SUDO'] = 'sudo' else: if can_execute_sudo_without_password(): env['CM_SUDO_USER'] = "yes" diff --git a/script/generate-mlperf-inference-submission/_cm.json b/script/generate-mlperf-inference-submission/_cm.json index 3827af24a4..2a51f764b3 100644 --- a/script/generate-mlperf-inference-submission/_cm.json +++ b/script/generate-mlperf-inference-submission/_cm.json @@ -53,6 +53,7 @@ "run_style": "CM_MLPERF_RUN_STYLE", "skip_truncation": "CM_SKIP_TRUNCATE_ACCURACY", "submission_dir": "CM_MLPERF_INFERENCE_SUBMISSION_DIR", + "submission_base_dir": "CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR", "clean": "CM_MLPERF_CLEAN_SUBMISSION_DIR", "hw_name": "CM_HW_NAME", "sw_notes_extra": "CM_MLPERF_SUT_SW_NOTES_EXTRA", @@ -120,5 +121,46 @@ "mlperf-inference-submission", "mlcommons-inference-submission" ], - "uid": "5f8ab2d0b5874d53" + "uid": "5f8ab2d0b5874d53", + "docker": { + "use_host_group_id": true, + "use_host_user_id": true, + "real_run": false, + "deps": [ + { + "tags": "get,mlperf,inference,results,dir,local", + "names": "get-mlperf-inference-results-dir", + "skip_if_env": { + "CM_MLPERF_INFERENCE_RESULTS_DIR_": [ + "on" + ] + } + }, + { + "tags": "get,mlperf,inference,submission,dir,local", + "names": "get-mlperf-inference-submission-dir", + "skip_if_any_env": { + "CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR": [ + "on" + ] + } + } + ], + "pre_run_cmds": [ + "cm pull repo" + ], + "mounts": [ + "${{ CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR }}:${{ CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR }}", + "${{ CM_MLPERF_INFERENCE_RESULTS_DIR_ }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR_ }}" + ], + "extra_run_args": " --cap-add SYS_ADMIN", + "os": "ubuntu", + "cm_repo": "mlcommons@cm4mlops", + "cm_repo_branch": "mlperf-inference", + "os_version": "22.04", + "docker_input_mapping": { + "submission_base_dir": "CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR", + "results_dir": "CM_MLPERF_INFERENCE_RESULTS_DIR_" + } + } } diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index b1b77ed4ba..3c03a2dda0 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -15,7 +15,7 @@ def fill_from_json(file_path, keys, sut_info): with open(file_path, 'r') as f: data = json.load(f) for key in keys: - if key in data and sut_info[key] is None: + if key in data and (sut_info[key] is None or sut_info[key] == "default"): sut_info[key] = data[key] elif key in data and sut_info[key] != data[key]: return -1 # error saying there is a mismatch in the value of a key @@ -28,6 +28,20 @@ def check_dict_filled(keys, sut_info): return False return True +# The function checks whether the submitting model name belongs standard model names for MLPef Inference +def model_in_valid_models(model, mlperf_version): + import submission_checker as checker + config = checker.MODEL_CONFIG + + if model not in config[mlperf_version]['models']: + internal_model_name = config[mlperf_version]["model_mapping"].get(model, '') # resnet50 -> resnet + if internal_model_name == '': + return (False, None) # Indicate failure with no internal model name + else: + return (True, internal_model_name) # Indicate success with internal model name + else: + return (True, model) + def generate_submission(i): # Save current user directory @@ -51,12 +65,19 @@ def generate_submission(i): env['CM_MLPERF_INFERENCE_SUBMISSION_DIR'] = os.path.join(user_home, "mlperf_submission") submission_dir = env.get('CM_MLPERF_INFERENCE_SUBMISSION_DIR', '') + if submission_dir == '': + submission_base_dir = env.get('CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR', '') + if submission_base_dir == '': + return {'return':1, 'error':f"Both CM_MLPERF_INFERENCE_SUBMISSION_DIR and CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR can not be empty!"} + else: + submission_dir = os.path.join(submission_base_dir, "mlperf_inference_submission") + env['CM_MLPERF_INFERENCE_SUBMISSION_DIR'] = submission_dir if env.get('CM_MLPERF_CLEAN_SUBMISSION_DIR','')!='': print ('=================================================') print ('Cleaning {} ...'.format(env['CM_MLPERF_INFERENCE_SUBMISSION_DIR'])) - if os.path.exists(env['CM_MLPERF_INFERENCE_SUBMISSION_DIR']): - shutil.rmtree(env['CM_MLPERF_INFERENCE_SUBMISSION_DIR']) + if os.path.exists(submission_dir): + shutil.rmtree(submission_dir) print ('=================================================') if not os.path.isdir(submission_dir): @@ -131,11 +152,12 @@ def generate_submission(i): saved_system_meta_file_path = os.path.join(result_path, 'system_meta.json') # checks for json file containing system meta sut_info = { - "system_name": None, + "hardware_name": None, "implementation": None, "device": None, "framework": None, - "run_config": None + "framework_version": "default", + "run_config": "default" } # variable to store the system meta model_mapping_combined = {} # to store all the model mapping related to an SUT @@ -163,32 +185,39 @@ def generate_submission(i): # Even the model mapping json file is present in root directory, the folders are traversed # and the data is updated provided not duplicated. models = [f for f in os.listdir(result_path) if not os.path.isfile(os.path.join(result_path, f))] - for model in models: - result_model_path = os.path.join(result_path, model) - scenarios = [f for f in os.listdir(result_model_path) if not os.path.isfile(os.path.join(result_model_path, f))] - for scenario in scenarios: - result_scenario_path = os.path.join(result_model_path, scenario) - modes = [f for f in os.listdir(result_scenario_path) if not os.path.isfile(os.path.join(result_scenario_path, f))] - for mode in modes: - result_mode_path = os.path.join(result_scenario_path,mode) - if mode == "performance": - compliance_performance_run_path = os.path.join(result_mode_path, "run_1") - # model mapping part - tmp_model_mapping_file_path = os.path.join(compliance_performance_run_path, "model_mapping.json") - if os.path.exists(tmp_model_mapping_file_path): - with open(tmp_model_mapping_file_path, 'r') as f: - new_model_mapping = json.load(f) - for new_custom_model in new_model_mapping: - if new_custom_model not in model_mapping_combined: - model_mapping_combined.update({new_custom_model:new_model_mapping[new_custom_model]}) - else: - return {"return":1, "error":f"model_mapping.json not found in {compliance_performance_run_path}"} - + if division == "open" and len(model_mapping_combined) == 0: + for model in models: + is_valid, returned_model_name = model_in_valid_models(model, env.get('CM_MLPERF_LAST_RELEASE', 'v4.1')) + if not is_valid: + result_model_path = os.path.join(result_path, model) + scenarios = [f for f in os.listdir(result_model_path) if not os.path.isfile(os.path.join(result_model_path, f))] + for scenario in scenarios: + result_scenario_path = os.path.join(result_model_path, scenario) + modes = [f for f in os.listdir(result_scenario_path) if not os.path.isfile(os.path.join(result_scenario_path, f))] + for mode in modes: + result_mode_path = os.path.join(result_scenario_path,mode) + if mode == "performance": + compliance_performance_run_path = os.path.join(result_mode_path, "run_1") + # model mapping part + tmp_model_mapping_file_path = os.path.join(compliance_performance_run_path, "model_mapping.json") + if os.path.exists(tmp_model_mapping_file_path): + with open(tmp_model_mapping_file_path, 'r') as f: + new_model_mapping = json.load(f) + for new_custom_model in new_model_mapping: + if new_custom_model not in model_mapping_combined: + model_mapping_combined.update({new_custom_model:new_model_mapping[new_custom_model]}) + else: + return {"return":1, "error":f"model_mapping.json not found in {compliance_performance_run_path}"} + else: + if returned_model_name != model: + model_mapping_combined.update({model:returned_model_name}) + if check_dict_filled(sut_info.keys(), sut_info): - system = sut_info["system_name"] + system = sut_info["hardware_name"] implementation = sut_info["implementation"] device = sut_info["device"] framework = sut_info["framework"].replace(" ","_") + framework_version = sut_info["framework_version"] run_config = sut_info["run_config"] new_res = f"{system}-{implementation}-{device}-{framework}-{run_config}" else: @@ -214,7 +243,7 @@ def generate_submission(i): system_meta_default['framework'] = framework + " " + framework_version else: print(parts) - return {'return': 1, 'error': f"The required details for generating the inference submission:\n1.system_name\n2.implementation\n3.framework\n4.run_config\nInclude a cm-sut-info.json file with the above content in {result_path}"} + return {'return': 1, 'error': f"The required details for generating the inference submission:\n1.hardware_name\n2.implementation\n3.Device\n4.framework\n5.framework_version\n6.run_config\nInclude a cm-sut-info.json or sut-info.json file with the above content in {result_path}"} platform_prefix = inp.get('platform_prefix', '') if platform_prefix: @@ -239,8 +268,11 @@ def generate_submission(i): results = {} + model_platform_info_file = None + for model in models: results[model] = {} + platform_info_file = None result_model_path = os.path.join(result_path, model) submission_model_path = os.path.join(submission_path, model) measurement_model_path = os.path.join(measurement_path, model) @@ -356,8 +388,10 @@ def generate_submission(i): with open(measurements_json_path, "r") as f: measurements_json = json.load(f) model_precision = measurements_json.get("weight_data_types", "fp32") - if os.path.exists(user_conf_path): + if os.path.exists(measurements_json_path): + # This line can be removed once the PR in the inference repo is merged. shutil.copy(measurements_json_path, os.path.join(submission_measurement_path, sub_res+'.json')) + shutil.copy(measurements_json_path, os.path.join(submission_measurement_path, 'model-info.json')) files = [] readme = False @@ -386,8 +420,10 @@ def generate_submission(i): files.append(f) elif f == "spl.txt": files.append(f) - elif f in [ "README.md", "README-extra.md", "cm-version-info.json", "os_info.json", "cpu_info.json", "pip_freeze.json" ] and mode == "performance": + elif f in [ "README.md", "README-extra.md", "cm-version-info.json", "os_info.json", "cpu_info.json", "pip_freeze.json", "system_info.txt" ] and mode == "performance": shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, f)) + if f == "system_info.txt" and not platform_info_file: + platform_info_file = os.path.join(result_mode_path, f) elif f in [ "console.out" ]: shutil.copy(os.path.join(result_mode_path, f), os.path.join(submission_measurement_path, mode+"_"+f)) @@ -410,13 +446,33 @@ def generate_submission(i): f.write("TBD") #create an empty README else: readme_suffix = "" - result_string, result = mlperf_utils.get_result_string(env['CM_MLPERF_LAST_RELEASE'], model, scenario, result_scenario_path, power_run, sub_res, division, system_file, model_precision) + result_string, result = mlperf_utils.get_result_string(env['CM_MLPERF_LAST_RELEASE'], model, scenario, result_scenario_path, power_run, sub_res, division, system_file, model_precision, env.get('CM_MLPERF_INFERENCE_SOURCE_VERSION')) for key in result: results[model][scenario][key] = result[key] with open(readme_file, mode='a') as f: f.write(result_string) + #Copy system_info.txt to the submission measurements model folder if any scenario performance run has it + sys_info_file = None + if os.path.exists(os.path.join(result_model_path, "system_info.txt")): + sys_info_file = os.path.join(result_model_path, "system_info.txt") + elif platform_info_file: + sys_info_file = platform_info_file + if sys_info_file: + model_platform_info_file = sys_info_file + shutil.copy(sys_info_file, os.path.join(measurement_model_path, "system_info.txt")) + + #Copy system_info.txt to the submission measurements folder if any model performance run has it + sys_info_file = None + if os.path.exists(os.path.join(result_path, "system_info.txt")): + sys_info_file = os.path.join(result_path, "system_info.txt") + elif model_platform_info_file: + sys_info_file = model_platform_info_file + if sys_info_file: + shutil.copy(sys_info_file, os.path.join(measurement_path, "system_info.txt")) + + with open(system_file, "w") as fp: json.dump(system_meta, fp, indent=2) diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py index e2569db27f..9acc4da978 100644 --- a/script/generate-mlperf-inference-user-conf/customize.py +++ b/script/generate-mlperf-inference-user-conf/customize.py @@ -346,7 +346,7 @@ def preprocess(i): env['CM_MLPERF_RANGING_USER_CONF'] = os.path.join(os.path.dirname(user_conf_path), "ranging_"+key+".conf")# ranging_user_conf_path for a shorter run if short_ranging: - env['CM_MLPERF_USER_CONF'] = "\${CM_MLPERF_USER_CONF}" + env['CM_MLPERF_USER_CONF'] = r"\${CM_MLPERF_USER_CONF}" else: env['CM_MLPERF_USER_CONF'] = os.path.join(os.path.dirname(user_conf_path), key+".conf")# user_conf_path else: diff --git a/script/get-cuda-devices/_cm.yaml b/script/get-cuda-devices/_cm.yaml index e0d348b831..2cd9073d7b 100644 --- a/script/get-cuda-devices/_cm.yaml +++ b/script/get-cuda-devices/_cm.yaml @@ -31,6 +31,9 @@ docker: use_host_group_id: 'yes' image_tag_extra: '-cm-dev' +env: + CM_DETECT_USING_PYCUDA: 'no' + new_env_keys: - CM_CUDA_DEVICE_* - CM_CUDA_NUM_DEVICES @@ -46,7 +49,7 @@ print_files_if_script_error: variations: with-pycuda: env: - CM_CUDA_DEVICES_DETECT_USING_PYCUDA: 'yes' + CM_DETECT_USING_PYCUDA: 'yes' deps: - tags: get,python3 names: diff --git a/script/get-cuda-devices/customize.py b/script/get-cuda-devices/customize.py index 4aaf215484..242044e7e8 100644 --- a/script/get-cuda-devices/customize.py +++ b/script/get-cuda-devices/customize.py @@ -6,7 +6,7 @@ def preprocess(i): env = i['env'] - if str(env.get('CM_CUDA_DEVICES_DETECT_USING_PYCUDA', '')).lower() in [ "1", "yes", "true"]: + if str(env.get('CM_DETECT_USING_PYCUDA', '')).lower() in [ "1", "yes", "true"]: i['run_script_input']['script_name'] = 'detect' return {'return':0} @@ -53,8 +53,8 @@ def postprocess(i): key_env = 'CM_CUDA_DEVICE_PROP_'+key.upper().replace(' ','_') env[key_env] = val - state['cm_cuda_num_devices'] = gpu_id - env['CM_CUDA_NUM_DEVICES'] = gpu_id + state['cm_cuda_num_devices'] = gpu_id + 1 + env['CM_CUDA_NUM_DEVICES'] = gpu_id + 1 state['cm_cuda_device_prop'] = p state['cm_cuda_devices_prop'] = gpu diff --git a/script/get-cuda/_cm.yaml b/script/get-cuda/_cm.yaml index d68e3fd7f8..db5a30b0bb 100644 --- a/script/get-cuda/_cm.yaml +++ b/script/get-cuda/_cm.yaml @@ -46,6 +46,7 @@ new_env_keys: - CUDA_PATH - CM_CUDA_* - CM_NVCC_* +- CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX5 - +PATH - +C_INCLUDE_PATH - +CPLUS_INCLUDE_PATH diff --git a/script/get-cuda/customize.py b/script/get-cuda/customize.py index 03e6c7bf79..11de3c6cd7 100644 --- a/script/get-cuda/customize.py +++ b/script/get-cuda/customize.py @@ -214,5 +214,6 @@ def postprocess(i): env['+ LDFLAGS'].append("-L"+x) env['CM_CUDA_VERSION_STRING'] = "cu"+env['CM_CUDA_VERSION'].replace(".", "") + env['CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX5'] = env['CM_CUDA_VERSION_STRING'] return {'return':0, 'version': version} diff --git a/script/get-dataset-imagenet-aux/_cm.json b/script/get-dataset-imagenet-aux/_cm.json index 97e10571cd..7093e8b2b6 100644 --- a/script/get-dataset-imagenet-aux/_cm.json +++ b/script/get-dataset-imagenet-aux/_cm.json @@ -39,6 +39,7 @@ }, "from.berkeleyvision": { "group": "download-source", + "default": true, "base": [ "2012" ], @@ -51,7 +52,6 @@ }, "from.dropbox": { "group": "download-source", - "default": true, "base": [ "2012" ], diff --git a/script/get-dataset-imagenet-val/_cm.yaml b/script/get-dataset-imagenet-val/_cm.yaml index 0b08586157..0b9923927b 100644 --- a/script/get-dataset-imagenet-val/_cm.yaml +++ b/script/get-dataset-imagenet-val/_cm.yaml @@ -94,7 +94,7 @@ variations: env: CM_DAE_FILENAME: ILSVRC2012_img_val_500.tar CM_DAE_URL: http://cKnowledge.org/ai/data/ILSVRC2012_img_val_500.tar -# CM_DAE_URL: https://www.dropbox.com/scl/fi/87mnvkd1la0taht40xxgn/ILSVRC2012_img_val_500.tar?rlkey=9t9spqtx3imsa32ga5zugom50&st=vl85iuat&dl=1 + CM_DOWNLOAD_URL1: https://www.dropbox.com/scl/fi/a7fhjnzxi6x3ceapxh5bm/ILSVRC2012_img_val_500.tar?rlkey=hz4rabo9ve43co3c303y9r6l7&st=ulcgb3av&dl=1 CM_DATASET_SIZE: '500' CM_DOWNLOAD_CHECKSUM: 8627befdd8c2bcf305729020e9db354e CM_DOWNLOAD_FILENAME: ILSVRC2012_img_val_500.tar diff --git a/script/get-dataset-mixtral/customize.py b/script/get-dataset-mixtral/customize.py new file mode 100644 index 0000000000..31d72127ea --- /dev/null +++ b/script/get-dataset-mixtral/customize.py @@ -0,0 +1,18 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + return {'return':0} + + +def postprocess(i): + env = i['env'] + + env['CM_DATASET_MIXTRAL_PREPROCESSED_PATH'] = env['CM_DATASET_PREPROCESSED_PATH'] + + return {'return':0} diff --git a/script/get-dataset-openimages/_cm.json b/script/get-dataset-openimages/_cm.json index 3f3904076b..b93cf8aa0f 100644 --- a/script/get-dataset-openimages/_cm.json +++ b/script/get-dataset-openimages/_cm.json @@ -69,6 +69,7 @@ "new_env_keys": [ "CM_DATASET_PATH", "CM_DATASET_PATH_ROOT", + "CM_DATASET_OPENIMAGES_PATH", "CM_DATASET_OPENIMAGES_DATASET_PATH", "CM_DATASET_OPENIMAGES_DATASET_PATH_ROOT", "CM_DATASET_ANNOTATIONS_DIR_PATH", @@ -76,7 +77,10 @@ "CM_DATASET_CALIBRATION_ANNOTATIONS_FILE_PATH", "CM_DATASET_VALIDATION_ANNOTATIONS_FILE_PATH", "CM_CALIBRATION_DATASET_PATH", - "CM_CALIBRATION_DATASET_PATH_ROOT" + "CM_CALIBRATION_DATASET_PATH_ROOT", + "CM_OPENIMAGES_CALIBRATION_DATASET_PATH", + "CM_DATASET_OPENIMAGES_ANNOTATIONS_DIR_PATH", + "CM_DATASET_OPENIMAGES_VALIDATION_ANNOTATIONS_FILE_PATH" ], "tags": [ "get", diff --git a/script/get-dataset-openimages/customize.py b/script/get-dataset-openimages/customize.py index 6ccb558c59..84d1f52312 100644 --- a/script/get-dataset-openimages/customize.py +++ b/script/get-dataset-openimages/customize.py @@ -70,6 +70,7 @@ def postprocess(i): annotations_file_path = os.path.join(env['CM_DATASET_ANNOTATIONS_DIR_PATH'], "openimages-mlperf.json") env['CM_DATASET_VALIDATION_ANNOTATIONS_FILE_PATH'] = annotations_file_path env['CM_DATASET_ANNOTATIONS_FILE_PATH'] = annotations_file_path + env['CM_DATASET_OPENIMAGES_VALIDATION_ANNOTATIONS_FILE_PATH'] = annotations_file_path if env.get("CM_DATASET_OPENIMAGES_CUSTOM_ANNOTATIONS",'') == "yes": annotations_file_src = env['CM_DATASET_OPENIMAGES_ANNOTATIONS_FILE_PATH'] shutil.copy(annotations_file_src, env['CM_DATASET_ANNOTATIONS_DIR_PATH']) @@ -77,6 +78,7 @@ def postprocess(i): env['CM_DATASET_OPENIMAGES_PATH_ROOT'] = env['CM_DATASET_PATH_ROOT'] else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'calibration', 'data') + env['CM_OPENIMAGES_CALIBRATION_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'calibration', 'data') env['CM_CALIBRATION_DATASET_PATH_ROOT'] = os.path.join(os.getcwd(), 'install') annotations_file_path = os.path.join(env['CM_DATASET_ANNOTATIONS_DIR_PATH'], "openimages-calibration-mlperf.json") env['CM_DATASET_CALIBRATION_ANNOTATIONS_FILE_PATH'] = annotations_file_path diff --git a/script/get-generic-python-lib/_cm.json b/script/get-generic-python-lib/_cm.json index b3757091c1..cdfe55bade 100644 --- a/script/get-generic-python-lib/_cm.json +++ b/script/get-generic-python-lib/_cm.json @@ -567,6 +567,20 @@ "CM_PANDAS_VERSION" ] }, + "whl-url.#": { + "deps": [ + { + "tags": "download,file,_url.#", + "force_cache": "yes", + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_GENERIC_PYTHON_PIP_URL" + } + } + ], + "env": { + "CM_TMP_PYTHON_PACKAGE_FORCE_INSTALL": "yes" + } + }, "path.#": { "env": { "CM_GENERIC_PYTHON_PIP_URL": "#" @@ -874,12 +888,26 @@ "deps": [ { "tags": "get,generic-python-lib,_package.networkx", - "enable_if_env": { - "CM_PYTHON_MINOR_VERSION": [ "7", "8" ] - } + "enable_if_env": { + "CM_PYTHON_MINOR_VERSION": [ "7", "8" ] + } } ] }, + "cxx11-abi": { + "env": { + } + }, + "torch,cxx11-abi": { + "env": { + "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/nightly/cpu-cxx11-abi" + } + }, + "package.torch,cxx11-abi": { + "env": { + "CM_GENERIC_PYTHON_PIP_INDEX_URL": "https://download.pytorch.org/whl/nightly/cpu-cxx11-abi" + } + }, "torch,pre": { "default_env": { "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS": "torch" @@ -914,7 +942,6 @@ }, "torch_cuda": { "default_env": { - "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS1": "torch" }, "deps": [ { @@ -926,7 +953,6 @@ ], "env": { "CM_GENERIC_PYTHON_PACKAGE_NAME": "torch", - "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL1": "https://download.pytorch.org/whl/<<>>", "CM_TORCH_VERSION_EXTRA": "CUDA" }, "new_env_keys": [ diff --git a/script/get-generic-python-lib/customize.py b/script/get-generic-python-lib/customize.py index 18ae0149aa..ea27ec6603 100644 --- a/script/get-generic-python-lib/customize.py +++ b/script/get-generic-python-lib/customize.py @@ -10,6 +10,7 @@ def preprocess(i): automation = i['automation'] run_script_input = i['run_script_input'] pip_version = env.get('CM_PIP_VERSION', '').strip().split('.') + package_name = env.get('CM_GENERIC_PYTHON_PACKAGE_NAME', '').strip() if package_name == '': return automation._available_variations({'meta':meta}) @@ -40,7 +41,7 @@ def preprocess(i): r = automation.run_native_script({'run_script_input':run_script_input, 'env':env, 'script_name':'uninstall_deps'}) if r['return']>0: return r - prepare_env_key = env['CM_GENERIC_PYTHON_PACKAGE_NAME'] + prepare_env_key = env.get('CM_GENERIC_PYTHON_PACKAGE_NAME', '') for x in ["-", "[", "]"]: prepare_env_key = prepare_env_key.replace(x,"_") @@ -78,6 +79,7 @@ def preprocess(i): # Check extra index URL extra_index_url = env.get('CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL','').strip() + if extra_index_url != '': # Check special cases if '${CM_TORCH_CUDA}' in extra_index_url: @@ -95,10 +97,6 @@ def preprocess(i): env['CM_GENERIC_PYTHON_PIP_EXTRA'] = extra - package_name = env.get('CM_GENERIC_PYTHON_PACKAGE_NAME', '').strip() - if package_name == '': - return automation._available_variations({'meta':meta}) - r = automation.run_native_script({'run_script_input':run_script_input, 'env':env, 'script_name':'install'}) if r['return']>0: return r diff --git a/script/get-generic-python-lib/detect-version.py b/script/get-generic-python-lib/detect-version.py index 8cd53515ae..86ab8adf0b 100644 --- a/script/get-generic-python-lib/detect-version.py +++ b/script/get-generic-python-lib/detect-version.py @@ -1,4 +1,5 @@ import os +import sys package_name = os.environ.get('CM_GENERIC_PYTHON_PACKAGE_NAME','') @@ -18,7 +19,7 @@ except Exception as e: error = format(e) - if error != '': + if error != '' and sys.version_info < (3, 9): try: import pkg_resources version = pkg_resources.get_distribution(package_name).version diff --git a/script/get-generic-python-lib/install.sh b/script/get-generic-python-lib/install.sh index 04243d9f4c..b79aa81463 100644 --- a/script/get-generic-python-lib/install.sh +++ b/script/get-generic-python-lib/install.sh @@ -35,6 +35,7 @@ if [[ ${CM_GENERIC_PYTHON_PACKAGE_NAME} == "tensorflow_old" ]]; then exit 0 fi fi + if [[ -n ${CM_GENERIC_PYTHON_PIP_URL} ]]; then cmd="${CM_PYTHON_BIN_WITH_PATH} -m pip install \"${CM_GENERIC_PYTHON_PIP_URL}\" ${CM_GENERIC_PYTHON_PIP_EXTRA}" echo $cmd diff --git a/script/get-generic-sys-util/README-extra.md b/script/get-generic-sys-util/README-extra.md new file mode 100644 index 0000000000..d8f0015ae8 --- /dev/null +++ b/script/get-generic-sys-util/README-extra.md @@ -0,0 +1,425 @@ +Please see [https://docs.mlcommons.org/cm4mlops/scripts/Detection-or-installation-of-tools-and-artifacts/get-generic-sys-util](https://docs.mlcommons.org/cm4mlops/scripts/Detection-or-installation-of-tools-and-artifacts/get-generic-sys-util) for the documentation of this CM script. + +# get-generic-sys-util +Below are the specific regexes and the format of output that they are expecting for each command used to check for versions. + +All commands are tested to be working on Ubuntu. + +Format: + +## Utility name +`regex` + +`command to obtain version` + +command output + +---- + +## g++-12 +`^.*([0-9]+(\\.[0-9]+)+).*` + +`g++-9 --version` + +g++-9 (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
+Copyright (C) 2019 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ +## g++-11 +`^.*([0-9]+(\\.[0-9]+)+).*` + +`g++-11 --version` + +g++-11 (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+Copyright (C) 2021 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ +## g++-12 +`^.*([0-9]+(\\.[0-9]+)+).*` + +`g++-12 --version` + +g++-12 (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0
+Copyright (C) 2022 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ + +## gcc-9 +`^.*([0-9]+(\\.[0-9]+)+).*` + +`gcc-9 --version` + +gcc-9 (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0
+Copyright (C) 2019 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ +## gcc-11 +`^.*([0-9]+(\\.[0-9]+)+).*` + +`gcc-11 --version` + +gcc-11 (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+Copyright (C) 2021 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ + +## libgflags-dev +`([\d.]+)` + +`pkg-config --modversion gflags` + +2.2.2 + +## libglog-dev +`([\d.]+)` + +`pkg-config --modversion libglog` + +0.4.0 + +## libboost-all-dev +`([0-9]+(\w.[0-9]+)+)` + +`dpkg -s libboost-dev | grep 'Version'` + +Version: 1.74.0.3ubuntu7 + + +## libpng-dev +`([\d.]+)` + +`pkg-config --modversion libpng` + +1.6.37 + +## libre2-dev +`([\d.]+)` + +`pkg-config --modversion libre2` + +0.0.0 + +## libpci-dev +`([\d.]+)` + +`pkg-config --modversion libpci` + +3.7.0 + + +## libreadline_dev +`([\d.]+)` + +`pkg-config --modversion readline` + +8.1 + +## zlib +`([\d.]+)` + +`pkg-config --modversion zlib` + +1.2.11 + + +## libsqlite3_dev +`([\d.]+)` + +`pkg-config --modversion sqlite3` + +3.37.2 + +## libssl_dev +`OpenSSL\s+([\d.]+)` + +`openssl version` + +OpenSSL 3.0.2 15 Mar 2022 (Library: OpenSSL 3.0.2 15 Mar 2022) + +## libudev-dev +`([\d.]+)` + +`pkg-config --modversion libudev` + +249 + + +## libbz2_dev +`Version ([A-Za-z0-9]+(\.[A-Za-z0-9]+)+)` + +`bzcat --version` + +bzip2, a block-sorting file compressor. Version 1.0.8, 13-Jul-2019. + +## libev_dev +dpkg here should be fine as only apt install is supported +`Version ([A-Za-z0-9]+(\.[A-Za-z0-9]+)+)` + +`dpkg -s libev-dev | grep 'Version'` + +Version: 1:4.33-1 + +## libffi-dev +`([\d.]+)` + +`pkg-config --modversion libffi` + +3.4.2 + +## libffi_dev +`([\d.]+)` + +`pkg-config --modversion libffi` + +3.4.2 + +## libffi7 +`\d\.\d-[0-9]+` + +`dpkg -l libffi7 2>/dev/null | grep '^ii' | awk '{print $3}' || rpm -q libffi7 2>/dev/null || pacman -Q libffi7 2>/dev/null` + +3.3-5ubuntu1 + +## libffi8 +`\d\.\d\.\d-\d` + +`pkg-config --modversion libffi8"` + +3.4.2-4 + +## libgdbm_dev +dpkg here should be fine as only apt install is supported +`dpkg -s libgdbm-dev | grep 'Version'` + +`([\d]+\.[\d\.-]+)` + +Version: 1.23-1 + + +## libgmock +`([\d.]+)` + +`pkg-config --modversion libgmock` + +1.11.0 + +## liblzma_dev +`[A-Za-z]+\s\d\.\d\.\d` + +`xz --version` + +xz (XZ Utils) 5.2.5 +liblzma 5.2.5 + + +## libmpfr_dev +`([\d.]+)` + +`pkg-config --modversion mpfr` + +`4.1.0` + +## libncurses_dev +`([0-9]+(\.[0-9]+)+)` + +`ncurses5-config --version` + +6.3.20211021 + + + +## ninja-build +`([\d.]+)` + +`ninja --version` + +1.11.1 + +## md5sha1sum +`md5sum \(GNU coreutils\) ([\d.]+)` + +`md5sum --version` or `sha1sum --version` + +md5sum (GNU coreutils) 9.5 + +sha1sum (GNU coreutils) 9.5 + + +## nlohmann-json3-dev +`([\d.]+)` + +`pkg-config --modversion nlohmann_json` + +`3.10.5` + +## ntpdate +`([A-Za-z0-9]+(\.[A-Za-z0-9]+)+)` + +`dpkg -l ntpdate 2>/dev/null | grep ^ii | awk '{print $3}'` + +1:4.2.8p15+dfsg-1ubuntu2 + +## nvidia-cuda-toolkit +`release ([\d.]+)` + +`nvcc --version` + +nvcc: NVIDIA (R) Cuda compiler driver
+Copyright (c) 2005-2021 NVIDIA Corporation
+Built on Thu_Nov_18_09:45:25_PST_2021
+Cuda compilation tools, release 11.5, V11.5.119
+Build cuda_11.5.r11.5/compiler.30672275_0
+ + +## psmisc +`\(PSmisc\) ([\d.]+)` + +`pstree --version` + +pstree (PSmisc) 23.4 + +## rapidjson-dev +`([\d.]+)` + +`pkg-config --modversion RapidJSON` + +1.1.0 + +## cmake +`cmake version ([\d.]+)` + +`cmake --version` + +cmake version 3.30.4 + +## libnuma-dev +`([\d.]+)` + +`pkg-config --modversion numa` + +2.0.14 + + +## numactl +`([\d.]+)` + +`pkg-config --modversion numa` + +2.0.14 + +## wget +`Wget\s*([\d.]+)` + +`wget --version` + +GNU Wget 1.21.2 built on linux-gnu. + +## screen +`Screen version ([\d.]+)` + +`screen --version` + +Screen version 4.00.020 (FAU) 23-Oct-06 + +## xz +`xz \(XZ Utils\) ([\d.]+)` + +`xz --version` + +xz (XZ Utils) 5.2.5 +liblzma 5.2.5 + +## VIM +`VIM - Vi IMproved ([\d.]+` + +`vim --version` + +VIM - Vi IMproved 9.0 (2022 Jun 28, compiled Aug 3 2024 14:50:46) + +## rsync +`rsync\s+version\s+([\d.]+)` + +`rsync --version` + +rsync version 3.2.7 protocol version 31 + +## sox +`sox:\s+SoX\s+v([\d.]+)` + +`sox --version` + +sox: SoX v14.4.2 + + +## systemd +`systemd ([\d]+)` + +`systemctl --version` + +systemd 249 (249.11-0ubuntu3.12) + +## tk-dev +Probably fine to use `dpkg` here as only installation supported is for ubuntu + +`([0-9]+(\.[0-9]+)+)` + +`dpkg -s tk-dev | grep Version` + +Version: 8.6.11+1build2 + + +## transmission +`transmission-daemon ([\d.]+)` + +`transmission-daemon --version` + +transmission-daemon 3.00 (bb6b5a062e) + + +## wkhtmltopdf +`wkhtmltopdf ([\d.]+)` + +`wkhtmltopdf --version` + +wkhtmltopdf 0.12.6 + +## systemd +`systemd ([\d]+)` + +`systemd --version` + +systemd 255 (255.4-1ubuntu8.4) + + +## dmidecode +`([\d.]+)` + +`dmidecode --version` + +3.3 + +## git-lfs +`git-lfs/([\d.]+)` + +`git-lfs --version` + +git-lfs/3.4.1 (GitHub; linux arm64; go 1.22.2) + +## zlib1g +`([\d.]+)` + +`pkg-config --modversion zlib` + +1.2.11 + +## zlib1g_dev +`([\d.]+)` + +`pkg-config --modversion zlib` + +1.2.11 diff --git a/script/get-generic-sys-util/README.md b/script/get-generic-sys-util/README.md deleted file mode 100644 index b1fc1a160e..0000000000 --- a/script/get-generic-sys-util/README.md +++ /dev/null @@ -1 +0,0 @@ -Please see [https://docs.mlcommons.org/cm4mlops/scripts/Detection-or-installation-of-tools-and-artifacts/get-generic-sys-util](https://docs.mlcommons.org/cm4mlops/scripts/Detection-or-installation-of-tools-and-artifacts/get-generic-sys-util) for the documentation of this CM script. diff --git a/script/get-generic-sys-util/_cm.json b/script/get-generic-sys-util/_cm.json index 90fba4fc98..100dfbc4b8 100644 --- a/script/get-generic-sys-util/_cm.json +++ b/script/get-generic-sys-util/_cm.json @@ -20,6 +20,10 @@ "CM_GENERIC_SYS_UTIL_INSTALL_NEEDED": "no", "CM_SYS_UTIL_VERSION_CMD": "" }, + "input_mapping": { + "ignore_missing": "CM_GENERIC_SYS_UTIL_IGNORE_MISSING_PACKAGE", + "fail_safe": "CM_TMP_FAIL_SAFE" + }, "tags": [ "get", "sys-util", @@ -31,15 +35,15 @@ "detect": { "group": "mode", "default": true, - "names": [ - "detect-sys-util" - ], "env": { "CM_GENERIC_SYS_UTIL_RUN_MODE": "detect" }, "prehook_deps": [ { "tags": "get,generic-sys-util,_install", + "force_env_keys": [ + "CM_TMP_FAIL_SAFE" + ], "inherit_variation_tags": true, "names": [ "install-sys-util" @@ -60,11 +64,18 @@ "group": "mode", "env": { "CM_GENERIC_SYS_UTIL_RUN_MODE": "install" - } + }, + "new_env_keys": [ + "CM_TMP_GENERIC_SYS_UTIL_PACKAGE_INSTALL_IGNORED", + "CM_GET_GENERIC_SYS_UTIL_INSTALL_FAILED" + ] }, "cmake": { "env": { - "CM_SYS_UTIL_NAME": "cmake" + "CM_SYS_UTIL_NAME": "cmake", + "CM_SYS_UTIL_VERSION_CMD": "cmake --version", + "CM_SYS_UTIL_VERSION_RE": "cmake version ([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1 }, "state": { "cmake": { @@ -73,11 +84,15 @@ "dnf": "cmake", "yum": "cmake" } - } + }, + "new_env_keys": ["CM_CMAKE_VERSION"] }, "dmidecode": { "env": { - "CM_SYS_UTIL_NAME": "dmidecode" + "CM_SYS_UTIL_NAME": "dmidecode", + "CM_SYS_UTIL_VERSION_CMD": "dmidecode --version", + "CM_SYS_UTIL_VERSION_RE": "([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "dmidecode": { @@ -86,60 +101,83 @@ "dnf": "dmidecode", "yum": "dmidecode" } - } + }, + "new_env_keys": ["CM_DMIDECODE_VERSION"] }, "g++-11": { "env": { - "CM_SYS_UTIL_NAME": "g++11" + "CM_SYS_UTIL_NAME": "g++11", + "CM_SYS_UTIL_VERSION_CMD": "g++-11 --version", + "CM_SYS_UTIL_VERSION_RE": "\\b(\\d+\\.\\d+(?:\\.\\d+)?)\\b", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0, + "CM_GENERIC_SYS_UTIL_IGNORE_VERSION_DETECTION_FAILURE": "yes" }, "state": { "g++11": { "apt": "g++-11", "dnf": "gcc-toolset-11-gcc-c++" } - } + }, + "new_env_keys": ["CM_GPP11_VERSION"] }, "g++-12": { "env": { - "CM_SYS_UTIL_NAME": "g++12" + "CM_SYS_UTIL_NAME": "g++12", + "CM_SYS_UTIL_VERSION_CMD": "g++-12 --version", + "CM_SYS_UTIL_VERSION_RE": "\\b(\\d+\\.\\d+(?:\\.\\d+)?)\\b", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0, + "CM_GENERIC_SYS_UTIL_IGNORE_VERSION_DETECTION_FAILURE": "yes" }, "state": { "g++12": { "apt": "g++-12", "dnf": "gcc-toolset-12-gcc-c++" } - } + }, + "new_env_keys": ["CM_GPP12_VERSION"] }, "g++-9": { "env": { - "CM_SYS_UTIL_NAME": "g++9" + "CM_SYS_UTIL_NAME": "g++9", + "CM_SYS_UTIL_VERSION_CMD": "g++-9 --version", + "CM_SYS_UTIL_VERSION_RE": "\\b(\\d+\\.\\d+(?:\\.\\d+)?)\\b", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "g++9": { "apt": "g++-9", "dnf": "gcc-toolset-9-gcc-c++" } - } + }, + "new_env_keys": ["CM_GPP9_VERSION"] }, "gcc-11": { "env": { - "CM_SYS_UTIL_NAME": "gcc11" + "CM_SYS_UTIL_NAME": "gcc11", + "CM_SYS_UTIL_VERSION_CMD": "gcc-11 --version", + "CM_SYS_UTIL_VERSION_RE": "\\b(\\d+\\.\\d+(?:\\.\\d+)?)\\b", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "gcc11": { "apt": "gcc-11" } - } + }, + "new_env_keys": ["CM_GCC11_VERSION"] }, "gcc-9": { "env": { - "CM_SYS_UTIL_NAME": "gcc9" + "CM_SYS_UTIL_NAME": "gcc9", + "CM_SYS_UTIL_VERSION_CMD": "gcc-9 --version", + "CM_SYS_UTIL_VERSION_RE": "\\b(\\d+\\.\\d+(?:\\.\\d+)?)\\b", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "gcc9": { "apt": "gcc-9" } - } + }, + "new_env_keys": ["CM_GCC9_VERSION"] }, "gflags-dev": { "env": { @@ -152,11 +190,15 @@ "dnf": "gflags-devel", "yum": "gflags-devel" } - } + }, + "new_env_keys": ["CM_GFLAGS_DEV_VERSION"] }, "git-lfs": { "env": { - "CM_SYS_UTIL_NAME": "git-lfs" + "CM_SYS_UTIL_NAME": "git-lfs", + "CM_SYS_UTIL_VERSION_CMD": "git-lfs --version", + "CM_SYS_UTIL_VERSION_RE": "git-lfs\\/([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "git-lfs": { @@ -165,7 +207,8 @@ "dnf": "git-lfs", "yum": "git-lfs" } - } + }, + "new_env_keys": ["CM_GIT_LFS_VERSION"] }, "glog-dev": { "env": { @@ -178,11 +221,15 @@ "dnf": "glog-devel", "yum": "glog-devel" } - } + }, + "new_env_keys": ["CM_GLOG_DEV_VERSION"] }, "libboost-all-dev": { "env": { - "CM_SYS_UTIL_NAME": "libboost-all-dev" + "CM_SYS_UTIL_NAME": "libboost-all-dev", + "CM_SYS_UTIL_VERSION_CMD": "dpkg -s libboost-dev | grep 'Version'", + "CM_SYS_UTIL_VERSION_RE": "([0-9]+(\\.[0-9]+)+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1 }, "state": { "libboost-all-dev": { @@ -191,11 +238,15 @@ "dnf": "boost-devel", "yum": "boost-devel" } - } + }, + "new_env_keys": ["CM_LIBBOOST_ALL_DEV_VERSION"] }, "libbz2-dev": { "env": { - "CM_SYS_UTIL_NAME": "libbz2_dev" + "CM_SYS_UTIL_NAME": "libbz2_dev", + "CM_SYS_UTIL_VERSION_CMD_OVERRIDE": "bzcat --version 2>&1 | grep bzip > tmp-ver.out", + "CM_SYS_UTIL_VERSION_RE": "([0-9]+(\\.[0-9]+)+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1 }, "state": { "libbz2_dev": { @@ -205,27 +256,33 @@ "yum": "libbzip2-devel", "zlib-devel": "libbz2-devel" } - } + }, + "new_env_keys": ["CM_LIBBZ2_DEV_VERSION"] }, "libev-dev": { "env": { - "CM_SYS_UTIL_NAME": "libev_dev" + "CM_SYS_UTIL_NAME": "libev_dev", + "CM_SYS_UTIL_VERSION_CMD": "dpkg -s libev-dev | grep 'Version'", + "CM_SYS_UTIL_VERSION_RE": "([\\d:]+\\.[\\d\\.-]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "libev_dev": { "apt": "libev-dev" } - } + }, + "new_env_keys": ["CM_LIBEV_DEV_VERSION"] }, "libffi": { "env": { "CM_SYS_UTIL_NAME": "libffi" }, "state": { - "libffi7": { + "libffi": { "apt": "libffi" } - } + }, + "new_env_keys": ["CM_LIBFFI_VERSION"] }, "libffi-dev": { "env": { @@ -238,17 +295,22 @@ "dnf": "libffi-devel", "yum": "libffi-devel" } - } + }, + "new_env_keys": ["CM_LIBFFI_DEV_VERSION"] }, "libffi7": { "env": { - "CM_SYS_UTIL_NAME": "libffi7" + "CM_SYS_UTIL_NAME": "libffi7", + "CM_SYS_UTIL_VERSION_CMD": "dpkg -l libffi7 2>/dev/null | grep '^ii' | awk '{print $3}' || rpm -q libffi7 2>/dev/null || pacman -Q libffi7 2>/dev/null", + "CM_SYS_UTIL_VERSION_RE": "\\d\\.\\d-[0-9]+", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "libffi7": { "apt": "libffi7" } - } + }, + "new_env_keys": ["CM_LIBFFI7_VERSION"] }, "libffi8": { "env": { @@ -258,17 +320,22 @@ "libffi8": { "apt": "libffi8" } - } + }, + "new_env_keys": ["CM_LIBFFI8_VERSION"] }, "libgdbm-dev": { "env": { - "CM_SYS_UTIL_NAME": "libgdbm_dev" + "CM_SYS_UTIL_NAME": "libgdbm_dev", + "CM_SYS_UTIL_VERSION_CMD": "dpkg -s libgdbm-dev | grep 'Version'", + "CM_SYS_UTIL_VERSION_RE": "([\\d]+\\.[\\d\\.-]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "libgdbm_dev": { "apt": "libgdbm-dev" } - } + }, + "new_env_keys": ["CM_LIBGDBM_DEV_VERSION"] }, "libgmock-dev": { "env": { @@ -281,17 +348,22 @@ "dnf": "gmock-devel", "yum": "gmock-devel" } - } + }, + "new_env_keys": ["CM_LIBGMOCK_DEV_VERSION"] }, "liblzma-dev": { "env": { - "CM_SYS_UTIL_NAME": "liblzma_dev" + "CM_SYS_UTIL_NAME": "liblzma_dev", + "CM_SYS_UTIL_VERSION_CMD": "xz --version", + "CM_SYS_UTIL_VERSION_RE": "(\\d(\\.\\d)+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "liblzma_dev": { "apt": "liblzma-dev" } - } + }, + "new_env_keys": ["CM_LIBLZMA_DEV_VERSION"] }, "libmkl-dev": { "env": { @@ -304,7 +376,8 @@ "dnf": "", "yum": "" } - } + }, + "new_env_keys": ["CM_LIBMKL_DEV_VERSION"] }, "libmpfr-dev": { "env": { @@ -318,11 +391,16 @@ "yum": "mpfr-devel.x86_64", "zypper": "mpfr-devel" } - } + }, + "new_env_keys": ["CM_LIBMPFR_DEV_VERSION"] }, "libncurses-dev": { "env": { - "CM_SYS_UTIL_NAME": "libncurses_dev" + "CM_SYS_UTIL_NAME": "libncurses_dev", + "CM_SYS_UTIL_VERSION_CMD": "ncurses5-config --version", + "CM_SYS_UTIL_VERSION_RE": "([0-9]+(\\.[0-9]+)+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1, + "CM_GENERIC_SYS_UTIL_IGNORE_VERSION_DETECTION_FAILURE": "yes" }, "state": { "libncurses_dev": { @@ -330,7 +408,8 @@ "dnf": "libncurses-devel", "yum": "libncurses-devel" } - } + }, + "new_env_keys": ["CM_LIBNCURSES_DEV_VERSION"] }, "libnuma-dev": { "env": { @@ -343,7 +422,8 @@ "dnf": "numactl-libs", "yum": "numactl-libs" } - } + }, + "new_env_keys": ["CM_LIBNUMA_DEV_VERSION"] }, "libpci-dev": { "env": { @@ -356,7 +436,8 @@ "dnf": "pciutils-devel", "yum": "pciutils-devel" } - } + }, + "new_env_keys": ["CM_LIBPCI_DEV_VERSION"] }, "libpng-dev": { "env": { @@ -369,7 +450,8 @@ "dnf": "libpng-devel", "yum": "libpng-devel" } - } + }, + "new_env_keys": ["CM_LIBPNG_DEV_VERSION"] }, "libre2-dev": { "env": { @@ -382,7 +464,8 @@ "dnf": "libre-devel", "yum": "libre-devel" } - } + }, + "new_env_keys": ["CM_LIBRE2_DEV_VERSION"] }, "libreadline-dev": { "env": { @@ -392,9 +475,10 @@ "libreadline_dev": { "apt": "libreadline-dev", "dnf": "libreadline-devel", - "yum": "lireadline-devel" + "yum": "readline-devel" } - } + }, + "new_env_keys": ["CM_LIBREADLINE_DEV_VERSION"] }, "libsqlite3-dev": { "env": { @@ -404,11 +488,15 @@ "libsqlite3_dev": { "apt": "libsqlite3-dev" } - } + }, + "new_env_keys": ["CM_LIBSQLITE3_DEV_VERSION"] }, "libssl-dev": { "env": { - "CM_SYS_UTIL_NAME": "libssl_dev" + "CM_SYS_UTIL_NAME": "libssl_dev", + "CM_SYS_UTIL_VERSION_CMD": "openssl version", + "CM_SYS_UTIL_VERSION_RE": "OpenSSL\\s+([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1 }, "state": { "libssl_dev": { @@ -417,7 +505,8 @@ "dnf": "libssl-devel", "yum": "libssl-devel" } - } + }, + "new_env_keys": ["CM_LIBSSL_DEV_VERSION"] }, "libudev-dev": { "env": { @@ -430,7 +519,8 @@ "dnf": "libudev-devl", "yum": "libudev-devel" } - } + }, + "new_env_keys": ["CM_LIBUDEV_DEV_VERSION"] }, "linux-tools": { "deps": [ @@ -445,22 +535,30 @@ "linux-tools": { "apt": "linux-tools-<<>>" } - } + }, + "new_env_keys": ["CM_LINUX_TOOLS_VERSION"] }, "md5sha1sum": { "env": { - "CM_SYS_UTIL_NAME": "md5sha1sum" + "CM_SYS_UTIL_NAME": "md5sha1sum", + "CM_SYS_UTIL_VERSION_CMD": "md5sum --version | grep sha1sum", + "CM_SYS_UTIL_VERSION_RE": "\\b(\\d+\\.\\d+(?:\\.\\d+)?)\\b", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "md5sha1sum": { "apt": "", "brew": "md5sha1sum" } - } + }, + "new_env_keys": ["CM_MD5SHA1SUM_VERSION"] }, "ninja-build": { "env": { - "CM_SYS_UTIL_NAME": "ninja-build" + "CM_SYS_UTIL_NAME": "ninja-build", + "CM_SYS_UTIL_VERSION_CMD": "ninja --version", + "CM_SYS_UTIL_VERSION_RE": "([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "ninja-build": { @@ -470,7 +568,8 @@ "yum": "ninja-build", "zypper": "ninja-build" } - } + }, + "new_env_keys": ["CM_NINJA_BUILD_VERSION"] }, "nlohmann-json3-dev": { "env": { @@ -479,9 +578,10 @@ "state": { "nlohmann_json3_dev": { "apt": "nlohmann-json3-dev", - "dnf": "nlohmann-json-dev" + "dnf": "nlohmann-json-devel" } - } + }, + "new_env_keys": ["CM_NLOHMANN_JSON3_DEV_VERSION"] }, "ntpdate": { "env": { @@ -494,7 +594,8 @@ "dnf": "ntpdate", "yum": "ntpdate" } - } + }, + "new_env_keys": ["CM_NTPDATE_VERSION"] }, "numactl": { "deps": [ @@ -513,7 +614,8 @@ } ], "env": { - "CM_SYS_UTIL_NAME": "numactl" + "CM_SYS_UTIL_NAME": "numactl", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "numactl": { @@ -521,11 +623,15 @@ "dnf": "numactl-devel", "yum": "numactl-devel" } - } + }, + "new_env_keys": ["CM_NUMACTL_VERSION"] }, "nvidia-cuda-toolkit": { "env": { - "CM_SYS_UTIL_NAME": "nvidia-cuda-toolkit" + "CM_SYS_UTIL_NAME": "nvidia-cuda-toolkit", + "CM_SYS_UTIL_VERSION_CMD": "nvcc --version", + "CM_SYS_UTIL_VERSION_RE": "release ([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1 }, "state": { "nvidia-cuda-toolkit": { @@ -534,6 +640,22 @@ "dnf": "nvidia-cuda-toolkit", "yum": "nvidia-cuda-toolkit" } + }, + "new_env_keys": ["CM_NVIDIA_CUDA_TOOLKIT_VERSION"] + }, + "pkg-config": { + "env": { + "CM_SYS_UTIL_NAME": "pkg_config", + "CM_SYS_UTIL_VERSION_RE": "\\b(\\d+\\.\\d+(?:\\.\\d+)?)\\b", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 + }, + "state": { + "pkg_config": { + "apt": "pkg-config", + "brew": "pkg-config", + "dnf": "pkg-config", + "yum": "pkg-config" + } } }, "psmisc": { @@ -547,7 +669,8 @@ "dnf": "psmisc", "yum": "psmisc" } - } + }, + "new_env_keys": ["CM_PSMISC_VERSION"] }, "rapidjson-dev": { "env": { @@ -560,12 +683,15 @@ "dnf": "rapidjson-devel", "yum": "rapidjson-devel" } - } + }, + "new_env_keys": ["CM_RAPIDJSON_DEV_VERSION"] }, "rsync": { "env": { "CM_SYS_UTIL_NAME": "rsync", - "CM_SYS_UTIL_CHECK_CMD": "rsync --version" + "CM_SYS_UTIL_VERSION_CMD": "rsync --version", + "CM_SYS_UTIL_VERSION_RE": "rsync\\s+version\\s+([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "rsync": { @@ -575,11 +701,15 @@ "yum": "rsync", "zypper": "rsync" } - } + }, + "new_env_keys": ["CM_RSYNC_VERSION"] }, "screen": { "env": { - "CM_SYS_UTIL_NAME": "screen" + "CM_SYS_UTIL_NAME": "screen", + "CM_SYS_UTIL_VERSION_CMD": "screen --version", + "CM_SYS_UTIL_VERSION_RE": "Screen version ([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "screen": { @@ -589,11 +719,15 @@ "yum": "screen", "zypper": "rsync" } - } + }, + "new_env_keys": ["CM_SCREEN_VERSION"] }, "sox": { "env": { - "CM_SYS_UTIL_NAME": "sox" + "CM_SYS_UTIL_NAME": "sox", + "CM_SYS_UTIL_VERSION_CMD": "sox --version", + "CM_SYS_UTIL_VERSION_RE": "sox:\\s+SoX\\s+v([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "sox": { @@ -601,11 +735,15 @@ "brew": "sox", "dnf": "sox" } - } + }, + "new_env_keys": ["CM_SOX_VERSION"] }, "systemd": { "env": { - "CM_SYS_UTIL_NAME": "systemd" + "CM_SYS_UTIL_NAME": "systemd", + "CM_SYS_UTIL_VERSION_CMD": "systemctl --version", + "CM_SYS_UTIL_VERSION_RE": "systemd ([\\d]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "systemd": { @@ -614,21 +752,30 @@ "dnf": "systemd", "yum": "systemd" } - } + }, + "new_env_keys": ["CM_SYSTEMD_VERSION"] }, "tk-dev": { "env": { - "CM_SYS_UTIL_NAME": "tk_dev" + "CM_SYS_UTIL_NAME": "tk_dev", + "CM_SYS_UTIL_VERSION_CMD": "dpkg -s tk-dev | grep Version", + "CM_SYS_UTIL_VERSION_RE": "([0-9]+(\\.[0-9]+)+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1 }, "state": { "tk_dev": { "apt": "tk-dev" } - } + }, + "new_env_keys": ["CM_TK_DEV_VERSION"] }, "transmission": { "env": { - "CM_SYS_UTIL_NAME": "transmission" + "CM_SYS_UTIL_NAME": "transmission", + "CM_SYS_UTIL_VERSION_CMD": "transmission-daemon --version", + "CM_SYS_UTIL_VERSION_RE": "transmission-daemon ([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0, + "CM_SYS_UTIL_VERSION_CMD_USE_ERROR_STREAM": "yes" }, "state": { "transmission": { @@ -637,13 +784,15 @@ "dnf": "transmission-daemon", "yum": "transmission-daemon" } - } + }, + "new_env_keys": ["CM_TRANSMISSION_VERSION"] }, "wget": { "env": { "CM_SYS_UTIL_NAME": "wget", "CM_SYS_UTIL_VERSION_CMD": "wget --version", - "CM_SYS_UTIL_VERSION_RE": "Wget\\s*([\\d.]+)" + "CM_SYS_UTIL_VERSION_RE": "Wget\\s*([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "wget": { @@ -658,27 +807,53 @@ "wkhtmltopdf": { "env": { "CM_SYS_UTIL_NAME": "wkhtmltopdf", - "CM_SYS_UTIL_CHECK_CMD": "wkhtmltopdf --version" + "CM_SYS_UTIL_VERSION_CMD": "wkhtmltopdf --version", + "CM_SYS_UTIL_VERSION_RE": "wkhtmltopdf ([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 }, "state": { "wkhtmltopdf": { "apt": "wkhtmltopdf", "brew": "wkhtmltopdf" } - } + }, + "new_env_keys": ["CM_WKHTMLTOPDF_VERSION"] + }, + "vim-common": { + "env": { + "CM_SYS_UTIL_NAME": "vim_common", + "CM_SYS_UTIL_VERSION_CMD": "vim --version", + "CM_SYS_UTIL_VERSION_RE": "VIM - Vi IMproved ([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 0 + }, + "state": { + "vim_common": { + "apt": "vim-common", + "brew": "vim", + "dnf": "vim-common", + "yum": "vim-common", + "choco": "vim" + } + }, + "new_env_keys": ["CM_VIM_COMMON_VERSION"] }, "xz": { "env": { - "CM_SYS_UTIL_NAME": "xz" + "CM_SYS_UTIL_NAME": "xz", + "CM_SYS_UTIL_VERSION_CMD": "xz --version", + "CM_SYS_UTIL_VERSION_RE": "xz \\(XZ Utils\\) ([\\d.]+)", + "CM_TMP_VERSION_DETECT_GROUP_NUMBER": 1 }, "state": { "xz": { "apt": "xz-utils", "brew": "xz", "dnf": "xz", - "yum": "xz" + "yum": "xz", + "choco": "xz" } - } + }, + "new_env_keys": ["CM_XZ_VERSION"] }, "zlib": { "env": { @@ -686,9 +861,11 @@ }, "state": { "zlib": { - "apt": "zlib1g" + "apt": "zlib1g", + "choco": "zlib" } - } + }, + "new_env_keys": ["CM_ZLIB_VERSION"] }, "zlib1g-dev": { "env": { @@ -701,7 +878,46 @@ "yum": "zlib-devel", "zypper": "zlib-devel" } - } + }, + "new_env_keys": ["CM_ZLIB1G_DEV_VERSION"] } + }, + "tests": { + "run_inputs": [ + { + "docker": "yes", + "docker_os": "rhel", + "docker_os_version": "9", + "test-all-variations": "yes", + "env": { + "CM_TMP_FAIL_SAFE": "yes" + }, + "ignore_missing": "yes" + }, + { + "docker": "yes", + "docker_os": "ubuntu", + "docker_os_version": "20.04", + "test-all-variations": "yes", + "ignore_missing": "yes", + "fail_safe": "yes" + }, + { + "docker": "yes", + "docker_os": "ubuntu", + "docker_os_version": "22.04", + "test-all-variations": "yes", + "ignore_missing": "yes", + "fail_safe": "yes" + }, + { + "docker": "yes", + "docker_os": "ubuntu", + "docker_os_version": "24.04", + "test-all-variations": "yes", + "ignore_missing": "yes", + "fail_safe": "yes" + } + ] } } diff --git a/script/get-generic-sys-util/customize.py b/script/get-generic-sys-util/customize.py index 648f139159..1bf2a31f60 100644 --- a/script/get-generic-sys-util/customize.py +++ b/script/get-generic-sys-util/customize.py @@ -14,23 +14,37 @@ def preprocess(i): if env.get('CM_SYS_UTIL_VERSION_CMD', '') != '' and env.get('CM_SYS_UTIL_CHECK_CMD', '') == '': env['CM_SYS_UTIL_CHECK_CMD'] = env['CM_SYS_UTIL_VERSION_CMD'] + if env.get('CM_GENERIC_SYS_UTIL_RUN_MODE', '') == "install": + i['run_script_input']['script_name'] = "install" + if env.get('CM_GENERIC_SYS_UTIL_RUN_MODE', '') == "detect": - if env.get('CM_SYS_UTIL_VERSION_CMD', '') != '': + if env.get('CM_SYS_UTIL_VERSION_CMD', '') != '' or env.get('CM_SYS_UTIL_VERSION_CMD_OVERRIDE', '') != '': r = automation.run_native_script({'run_script_input':i['run_script_input'], 'env':env, 'script_name':'detect'}) - if r['return'] > 0: #detection failed, do install via prehook_deps + if r['return'] != 0: #detection failed, do install via prehook_deps + print("detection failed, going for installation") env['CM_GENERIC_SYS_UTIL_INSTALL_NEEDED'] = "yes" return {'return': 0} else: #detection is successful, no need to install + #print("detection success") env['CM_SYS_UTIL_INSTALL_CMD'] = "" return {'return': 0} else: #No detction command available, just install + #print("No detection possible, going for installation") env['CM_GENERIC_SYS_UTIL_INSTALL_NEEDED'] = "yes" return {'return': 0} # Only "install" mode reaches here pm = env.get('CM_HOST_OS_PACKAGE_MANAGER') + util = env.get('CM_SYS_UTIL_NAME', '') + if util == '': + return {'return': 1, 'error': 'Please select a variation specifying the sys util name'} + + package = state.get(util) + package_name = None + if package and pm: + package_name = package.get(pm) - if os_info['platform'] == 'windows': + if os_info['platform'] == 'windows' and not package_name: print ('') print ('WARNING: for now skipping get-generic-sys-util on Windows ...') print ('') @@ -40,19 +54,17 @@ def preprocess(i): if not pm: return {'return': 1, 'error': 'Package manager not detected for the given OS'} - util = env.get('CM_SYS_UTIL_NAME', '') - if util == '': - return {'return': 1, 'error': 'Please select a variation specifying the sys util name'} - - - package = state.get(util) if not package: - return {'return': 1, 'error': 'No package name specified for {} and util name {}'.format(pm, util)} + return {'return': 1, 'error': f'No package name specified for {util} in the meta'} - package_name = package.get(pm) if not package_name: - return {'return': 1, 'error': 'No package name specified for {} and util name {}'.format(pm, util)} + if str(env.get('CM_GENERIC_SYS_UTIL_IGNORE_MISSING_PACKAGE', '')).lower() in [ "1", "true", "yes" ]: + print(f"WARNING: No package name specified for {pm} and util name {util}. Ignoring it...") + env['CM_TMP_GENERIC_SYS_UTIL_PACKAGE_INSTALL_IGNORED'] = 'yes' + return {'return': 0} + else: + return {'return': 1, 'error': f'No package name specified for {pm} and util name {util}'} if util == "libffi": if env.get("CM_HOST_OS_FLAVOR", "") == "ubuntu": @@ -95,47 +107,54 @@ def preprocess(i): return {'return':0} + + def detect_version(i): env = i['env'] version_env_key = f"CM_{env['CM_SYS_UTIL_NAME'].upper()}_VERSION" version_check_re = env.get('CM_SYS_UTIL_VERSION_RE', '') + group_number = env.get('CM_TMP_VERSION_DETECT_GROUP_NUMBER', 1) + # Confirm that the regex pattern and file are present if version_check_re == '' or not os.path.exists("tmp-ver.out"): version = "undetected" - else: r = i['automation'].parse_version({'match_text': version_check_re, - 'group_number': 1, + 'group_number': group_number, 'env_key': version_env_key, 'which_env': env}) + if r['return'] >0: return r version = r['version'] + print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) + + return {'return': 0, 'version': version} - print (i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return':0, 'version':version} def postprocess(i): env = i['env'] version_env_key = f"CM_{env['CM_SYS_UTIL_NAME'].upper()}_VERSION" - if env.get('CM_SYS_UTIL_VERSION_CMD', '') != '' and (env['CM_GENERIC_SYS_UTIL_RUN_MODE'] == "install" or env.get(version_env_key, '') == '') : + if (env.get('CM_SYS_UTIL_VERSION_CMD', '') != '' or env.get('CM_SYS_UTIL_VERSION_CMD_OVERRIDE', '') != '') and env.get(version_env_key, '') == '' and str(env.get('CM_TMP_GENERIC_SYS_UTIL_PACKAGE_INSTALL_IGNORED', '')).lower() not in ["yes", "1", "true"] and env.get('CM_GET_GENERIC_SYS_UTIL_INSTALL_FAILED', '') != 'yes': automation = i['automation'] + r = automation.run_native_script({'run_script_input':i['run_script_input'], 'env':env, 'script_name':'detect'}) - if r['return'] > 0: - return r + if r['return'] > 0 and str(env.get('CM_GENERIC_SYS_UTIL_IGNORE_VERSION_DETECTION_FAILURE', '')).lower() not in [ "1", "yes", "true" ]: + return {'return': 1, 'error': 'Version detection failed after installation. Please check the provided version command or use env.CM_GENERIC_SYS_UTIL_IGNORE_VERSION_DETECTION_FAILURE=yes to ignore the error.'} - r = detect_version(i) + elif r['return'] == 0: + r = detect_version(i) - if r['return'] >0: return r + if r['return'] >0: return r - version = r['version'] + version = r['version'] - env[version_env_key] = version + env[version_env_key] = version - #Not used now - env['CM_GENERIC_SYS_UTIL_'+env['CM_SYS_UTIL_NAME'].upper()+'_CACHE_TAGS'] = 'version-'+version + #Not used now + env['CM_GENERIC_SYS_UTIL_'+env['CM_SYS_UTIL_NAME'].upper()+'_CACHE_TAGS'] = 'version-'+version if env.get(version_env_key, '') == '': env[version_env_key] = "undetected" diff --git a/script/get-generic-sys-util/detect.sh b/script/get-generic-sys-util/detect.sh old mode 100644 new mode 100755 index 6f2e3759bd..2c35837992 --- a/script/get-generic-sys-util/detect.sh +++ b/script/get-generic-sys-util/detect.sh @@ -1,7 +1,21 @@ #!/bin/bash -if [[ -n "${CM_SYS_UTIL_VERSION_CMD}" ]]; then - ${CM_SYS_UTIL_VERSION_CMD} > tmp-ver.out +if [[ -n "${CM_SYS_UTIL_VERSION_CMD_OVERRIDE}" ]]; then + cmd="${CM_SYS_UTIL_VERSION_CMD_OVERRIDE}" + echo $cmd + eval $cmd + test $? -eq 0 || exit $? +else + if [[ -n "${CM_SYS_UTIL_VERSION_CMD}" ]]; then + if [[ "${CM_SYS_UTIL_VERSION_CMD_USE_ERROR_STREAM}" == "yes" ]]; then + # Redirect both stdout and stderr to tmp-ver.out + cmd="${CM_SYS_UTIL_VERSION_CMD} > tmp-ver.out 2>&1" + else + cmd="${CM_SYS_UTIL_VERSION_CMD} > tmp-ver.out" + fi + echo $cmd + eval $cmd + test $? -eq 0 || exit $? + fi fi -test $? -eq 0 || exit $? diff --git a/script/get-generic-sys-util/install.sh b/script/get-generic-sys-util/install.sh new file mode 100644 index 0000000000..145d0b7451 --- /dev/null +++ b/script/get-generic-sys-util/install.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Safe execution of a command stored in a variable +cmd="${CM_SYS_UTIL_INSTALL_CMD}" +echo "$cmd" + +# Execute the command and capture the exit status directly +if ! eval "$cmd"; then + echo "Command failed with status $?" + if [[ "${CM_TMP_FAIL_SAFE}" == 'yes' ]]; then + # Exit safely if fail-safe is enabled + echo "CM_GET_GENERIC_SYS_UTIL_INSTALL_FAILED=yes" > tmp-run-env.out + echo "Fail-safe is enabled, exiting with status 0" + exit 0 + else + # Otherwise exit with the actual error status + exit $? + fi +else + #echo "Command succeeded" + exit 0 +fi diff --git a/script/get-generic-sys-util/run.sh b/script/get-generic-sys-util/run.sh deleted file mode 100644 index 27c2f62867..0000000000 --- a/script/get-generic-sys-util/run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -cmd=${CM_SYS_UTIL_INSTALL_CMD} -echo $cmd -eval $cmd -test $? -eq 0 || exit $? diff --git a/script/get-gh-actions-runner/_cm.yaml b/script/get-gh-actions-runner/_cm.yaml new file mode 100644 index 0000000000..d3d1a5fba0 --- /dev/null +++ b/script/get-gh-actions-runner/_cm.yaml @@ -0,0 +1,22 @@ +alias: get-gh-actions-runner +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +tags: +- get +- gh +- actions-runner +- runner-code +- runner +- code +- gh-actions-runner +uid: 5b005c5a76f242a7 +new_env_keys: + - CM_GH_ACTIONS_RUNNER_CODE_PATH +deps: + - tags: detect-os + - tags: download-and-extract,_extract,_url.https://github.com/actions/runner/releases/download/v2.320.0/actions-runner-linux-x64-2.320.0.tar.gz + force_cache: yes + extra_cache_tags: gh-actions-runner-code + env: + CM_DAE_FINAL_ENV_NAME: CM_GH_ACTIONS_RUNNER_CODE_PATH diff --git a/script/get-gh-actions-runner/customize.py b/script/get-gh-actions-runner/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/get-gh-actions-runner/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/get-git-repo/run.bat b/script/get-git-repo/run.bat index 583d324ee2..d00f32b15d 100644 --- a/script/get-git-repo/run.bat +++ b/script/get-git-repo/run.bat @@ -6,7 +6,7 @@ rem echo Cloning MLCommons from %CM_GIT_URL% with branch %CM_GIT_CHECKOUT% %CM_G rem git clone %CM_GIT_RECURSE_SUBMODULES% %CM_GIT_URL% %CM_GIT_DEPTH% inference rem cd inference rem git checkout -b "%CM_GIT_CHECKOUT%" -rem +rem rem Next line allows ERRORLEVEL inside if statements! setlocal enabledelayedexpansion @@ -18,9 +18,10 @@ set folder=%CM_GIT_CHECKOUT_FOLDER% if not exist "%CM_TMP_GIT_PATH%" ( - if exist %folder% ( - deltree %folder% + if exist "%folder%" ( + rmdir /S /Q "%folder%" rem Use rmdir instead of deltree ) + echo ****************************************************** echo Current directory: %CUR_DIR% echo. @@ -28,9 +29,12 @@ if not exist "%CM_TMP_GIT_PATH%" ( echo. echo "%CM_GIT_CLONE_CMD%" echo. + %CM_GIT_CLONE_CMD% IF !ERRORLEVEL! NEQ 0 EXIT !ERRORLEVEL! - cd %folder% + + cd "%folder%" + if not "%CM_GIT_SHA%" == "" ( echo. echo. @@ -39,9 +43,7 @@ if not exist "%CM_TMP_GIT_PATH%" ( ) ) else ( - - cd %folder% - + cd "%folder%" ) if not "%CM_GIT_SUBMODULES%" == "" ( @@ -62,6 +64,7 @@ if "%CM_GIT_PATCH%" == "yes" ( ) ) -cd %CUR_DIR% +cd "%CUR_DIR%" exit /b 0 + diff --git a/script/get-git-repo/run.sh b/script/get-git-repo/run.sh index 6cd8ef8fbd..2a7b0b51c9 100644 --- a/script/get-git-repo/run.sh +++ b/script/get-git-repo/run.sh @@ -6,7 +6,9 @@ SCRIPT_DIR=${CM_TMP_CURRENT_SCRIPT_PATH} folder=${CM_GIT_CHECKOUT_FOLDER} if [ ! -e "${CM_TMP_GIT_PATH}" ]; then - rm -rf ${folder} + cmd="rm -rf ${folder}" + echo $cmd + eval $cmd echo "******************************************************" echo "Current directory: ${CUR_DIR}" echo "" @@ -16,7 +18,13 @@ if [ ! -e "${CM_TMP_GIT_PATH}" ]; then echo "" ${CM_GIT_CLONE_CMD} - test $? -eq 0 || exit $? + rcode=$? + + if [ ! $rcode -eq 0 ]; then #try once more + rm -rf $folder + ${CM_GIT_CLONE_CMD} + test $? -eq 0 || exit $? + fi cd ${folder} diff --git a/script/get-ml-model-gptj/_cm.json b/script/get-ml-model-gptj/_cm.json index 396e0d595e..39cb88e488 100644 --- a/script/get-ml-model-gptj/_cm.json +++ b/script/get-ml-model-gptj/_cm.json @@ -253,6 +253,9 @@ "python", "python3" ] + }, + { + "tags": "get,generic-python-lib,_package.safetensors" } ] }, diff --git a/script/get-ml-model-gptj/customize.py b/script/get-ml-model-gptj/customize.py index ab475f0454..639efcde87 100644 --- a/script/get-ml-model-gptj/customize.py +++ b/script/get-ml-model-gptj/customize.py @@ -24,6 +24,8 @@ def preprocess(i): elif env.get('CM_TMP_ML_MODEL_PROVIDER', '') == 'nvidia': i['run_script_input']['script_name'] = 'run-nvidia' + if str(env.get('CM_DOCKER_DETACHED_MODE','')).lower() in ['yes', 'true', "1"]: + env['DOCKER_RUN_OPTS'] = "--rm --ipc=host --ulimit memlock=-1 --ulimit stack=67108864" gpu_arch = int(float(env['CM_CUDA_DEVICE_PROP_GPU_COMPUTE_CAPABILITY']) * 10) env['CM_GPU_ARCH'] = gpu_arch env['CM_TMP_REQUIRE_DOWNLOAD'] = 'no' diff --git a/script/get-ml-model-gptj/run-nvidia.sh b/script/get-ml-model-gptj/run-nvidia.sh index deba000ef9..27e5a675ce 100644 --- a/script/get-ml-model-gptj/run-nvidia.sh +++ b/script/get-ml-model-gptj/run-nvidia.sh @@ -12,10 +12,8 @@ cd ${CM_TENSORRT_LLM_CHECKOUT_PATH} make -C docker build test $? -eq 0 || exit $? -RUN_CMD="bash -c 'python3 scripts/build_wheel.py -a=${CM_GPU_ARCH} --clean --install --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized --model_dir=/mnt/models/GPTJ-6B/checkpoint-final --qformat=fp8 --kv_cache_dtype=fp8 '" -DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt" -export DOCKER_RUN_ARGS="$DOCKER_RUN_ARGS" -export RUN_CMD="$RUN_CMD" +export RUN_CMD="bash -c 'python3 scripts/build_wheel.py -a=${CM_GPU_ARCH} --clean --install --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized --model_dir=/mnt/models/GPTJ-6B/checkpoint-final --qformat=fp8 --kv_cache_dtype=fp8 '" +export DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt" make -C docker run LOCAL_USER=1 test $? -eq 0 || exit $? diff --git a/script/get-ml-model-huggingface-zoo/download_model.py b/script/get-ml-model-huggingface-zoo/download_model.py index 4e6e9c86e8..87f9b25aeb 100644 --- a/script/get-ml-model-huggingface-zoo/download_model.py +++ b/script/get-ml-model-huggingface-zoo/download_model.py @@ -28,8 +28,7 @@ model_filenames = model_filename.split(',') if ',' in model_filename else [model_filename] - # First must be model - base_model_filename = model_filenames[0] + base_model_filepath = None files = [] if full_subfolder!='': @@ -93,15 +92,17 @@ def list_hf_files(path): xrevision = None if revision == '' else revision xsubfolder = None if subfolder == '' else subfolder - hf_hub_download(repo_id=model_stub, + downloaded_path = hf_hub_download(repo_id=model_stub, subfolder=xsubfolder, filename=model_filename, - force_filename=model_filename, revision=xrevision, cache_dir=os.getcwd()) + print(downloaded_path) + if not base_model_filepath: + base_model_filepath = downloaded_path print ('') with open('tmp-run-env.out', 'w') as f: - f.write(f"CM_ML_MODEL_FILE_WITH_PATH={os.path.join(os.getcwd(),base_model_filename)}") + f.write(f"CM_ML_MODEL_FILE_WITH_PATH={base_model_filepath}") diff --git a/script/get-ml-model-huggingface-zoo/run.bat b/script/get-ml-model-huggingface-zoo/run.bat index 27155cb427..6a4faa929d 100644 --- a/script/get-ml-model-huggingface-zoo/run.bat +++ b/script/get-ml-model-huggingface-zoo/run.bat @@ -1,2 +1,3 @@ -%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\download_model.py +echo %CM_RUN_CMD% +call %CM_RUN_CMD% IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/get-ml-model-llama2/_cm.json b/script/get-ml-model-llama2/_cm.json index d64c6e004a..bd88acb53c 100644 --- a/script/get-ml-model-llama2/_cm.json +++ b/script/get-ml-model-llama2/_cm.json @@ -17,7 +17,8 @@ "new_env_keys": [ "CM_ML_MODEL_*", "LLAMA2_CHECKPOINT_PATH", - "CM_NVIDIA_TP_SIZE" + "CM_NVIDIA_TP_SIZE", + "CM_LLAMA2_FINAL_SAFE_TENSORS_PATH" ], "prehook_deps": [ { @@ -174,8 +175,8 @@ "tags": "get,preprocessed,dataset,openorca,_calibration,_mlc" }, { - "tags": "get,git,repo,_repo.https://github.com/mlcommons/submissions_inference_v4.1", - "extra_cache_tags": "inference,submissions", + "tags": "get,git,repo,_repo.https://github.com/mlcommons/inference_results_v4.1,_branch.cm-code-only", + "extra_cache_tags": "inference,results", "env": { "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_MLPERF_INFERENCE_RESULTS_PATH" } @@ -187,7 +188,10 @@ "tags": "get,generic-python-lib,_package.nltk" }, { - "tags": "get,generic-python-lib,_torch" + "tags": "get,generic-python-lib,_torch_cuda" + }, + { + "tags": "get,generic-python-lib,_package.compressed_tensors" } ] }, diff --git a/script/get-ml-model-stable-diffusion/_cm.json b/script/get-ml-model-stable-diffusion/_cm.json index 2e062a080a..5934e5c3e8 100644 --- a/script/get-ml-model-stable-diffusion/_cm.json +++ b/script/get-ml-model-stable-diffusion/_cm.json @@ -106,6 +106,7 @@ "pytorch": { "default": true, "env": { + "CM_ML_MODEL_STARTING_WEIGHTS_FILENAME": "https://github.com/mlcommons/inference/tree/master/text_to_image#download-model", "CM_ML_MODEL_FRAMEWORK": "pytorch" }, "group": "framework" @@ -115,7 +116,6 @@ }, "pytorch,fp32": { "env": { - "CM_ML_MODEL_STARTING_WEIGHTS_FILENAME": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0" }, "required_disk_space": 13000 }, diff --git a/script/get-mlperf-inference-loadgen/_cm.yaml b/script/get-mlperf-inference-loadgen/_cm.yaml index a097a1edde..3ce1f83992 100644 --- a/script/get-mlperf-inference-loadgen/_cm.yaml +++ b/script/get-mlperf-inference-loadgen/_cm.yaml @@ -24,6 +24,7 @@ deps: - CM_GIT_CHECKOUT names: - inference-src-loadgen + - inference-src skip_if_env: CM_MLPERF_INFERENCE_LOADGEN_DOWNLOAD: - 'YES' @@ -150,6 +151,11 @@ variations: CM_MLPERF_INFERENCE_LOADGEN_DOWNLOAD_URL: https://www.dropbox.com/scl/fi/gk5e9kziju5t56umxyzyx/loadgen.zip?rlkey=vsie4xnzml1inpjplm5cg7t54&dl=0 CM_MLPERF_INFERENCE_LOADGEN_VERSION: v4.0 CM_VERIFY_SSL: false + no-compilation-warnings: + env: + '+ CXXFLAGS': + - '-Werror' + - '-Wno-unused-parameter' versions: custom: diff --git a/script/get-mlperf-inference-loadgen/run.sh b/script/get-mlperf-inference-loadgen/run.sh index d9acb121b8..ac61ad329b 100644 --- a/script/get-mlperf-inference-loadgen/run.sh +++ b/script/get-mlperf-inference-loadgen/run.sh @@ -24,15 +24,15 @@ fi cmake \ -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ "${CM_MLPERF_INFERENCE_SOURCE}/loadgen" \ - -DPYTHON_EXECUTABLE:FILEPATH=${CM_PYTHON_BIN_WITH_PATH} -if [ "${?}" != "0" ]; then exit 1; fi + -DPYTHON_EXECUTABLE:FILEPATH="${CM_PYTHON_BIN_WITH_PATH}" -B . +if [ ${?} -ne 0 ]; then exit $?; fi echo "******************************************************" CM_MAKE_CORES=${CM_MAKE_CORES:-${CM_HOST_CPU_TOTAL_CORES}} CM_MAKE_CORES=${CM_MAKE_CORES:-2} -cmake --build . --target install -j ${CM_MAKE_CORES} -if [ "${?}" != "0" ]; then exit 1; fi +cmake --build . --target install -j "${CM_MAKE_CORES}" +if [ ${?} -ne 0 ]; then exit $?; fi # Clean build directory (too large) cd "${CUR_DIR}" @@ -40,17 +40,11 @@ if [[ $CM_MLPERF_INFERENCE_LOADGEN_BUILD_CLEAN == "yes" ]]; then rm -rf build fi -PYTHON_VERSION=`${CM_PYTHON_BIN_WITH_PATH} -V |cut -d' ' -f2` -PYTHON_SHORT_VERSION=${PYTHON_VERSION%.*} -PYTHON_MINOR_VERSION=${PYTHON_SHORT_VERSION#*.} -MLPERF_INFERENCE_PYTHON_SITE_BASE=${INSTALL_DIR}"/python" cd "${CM_MLPERF_INFERENCE_SOURCE}/loadgen" -#CFLAGS="-std=c++14 -O3" ${CM_PYTHON_BIN_WITH_PATH} setup.py bdist_wheel -#${CM_PYTHON_BIN_WITH_PATH} -m pip install --force-reinstall `ls dist/mlperf_loadgen-*cp3${PYTHON_MINOR_VERSION}*.whl` --target="${MLPERF_INFERENCE_PYTHON_SITE_BASE}" ${CM_PYTHON_BIN_WITH_PATH} -m pip install . --target="${MLPERF_INFERENCE_PYTHON_SITE_BASE}" -if [ "${?}" != "0" ]; then exit 1; fi +if [ ${?} -ne 0 ]; then exit $?; fi # Clean the built wheel #find . -name 'mlcommons_loadgen*.whl' | xargs rm diff --git a/script/get-mlperf-inference-nvidia-common-code/_cm.json b/script/get-mlperf-inference-nvidia-common-code/_cm.json index 5785f93f80..a0191d9e9e 100644 --- a/script/get-mlperf-inference-nvidia-common-code/_cm.json +++ b/script/get-mlperf-inference-nvidia-common-code/_cm.json @@ -11,7 +11,7 @@ "names": [ "mlperf-inference-results" ], - "tags": "get,mlperf,inference,results,official", + "tags": "get,mlperf,inference,results,official,_code-only", "inherit_variation_tags": true } ], @@ -29,7 +29,8 @@ "uid": "26b78bf3ffdc4926", "variations": { "mlcommons": { - "group": "repo-owner" + "group": "repo-owner", + "default": true }, "custom": { "group": "repo-owner" diff --git a/script/get-mlperf-inference-results/_cm.json b/script/get-mlperf-inference-results/_cm.json index 45d1c2f519..01451d42b8 100644 --- a/script/get-mlperf-inference-results/_cm.json +++ b/script/get-mlperf-inference-results/_cm.json @@ -9,7 +9,7 @@ "CM_GIT_DEPTH": "--depth 1", "CM_GIT_PATCH": "no" }, - "default_version": "v3.1", + "default_version": "v4.0", "prehook_deps": [ { "tags": "get,git,repo", @@ -72,11 +72,6 @@ "env": { "CM_MLPERF_INFERENCE_RESULTS_VERSION_NAME": "v4.1", "CM_GIT_URL": "https://github.com/<<>>/inference_results_v4.1.git" - }, - "adr": { - "inference-results-repo": { - "tags": "_branch.cm-fixes" - } } } }, @@ -112,6 +107,14 @@ "env": { "GITHUB_REPO_OWNER": "GATEOverflow" } + }, + "code-only": { + "group": "repo-branch", + "adr": { + "inference-results-repo": { + "tags": "_branch.cm-code-only" + } + } } } } diff --git a/script/get-mlperf-inference-src/_cm.json b/script/get-mlperf-inference-src/_cm.json index 9bd2f4c3aa..2b62b7f5a7 100644 --- a/script/get-mlperf-inference-src/_cm.json +++ b/script/get-mlperf-inference-src/_cm.json @@ -32,6 +32,7 @@ "CM_MLPERF_INFERENCE_GPTJ_PATH", "CM_MLPERF_INFERENCE_RNNT_PATH", "CM_MLPERF_INFERENCE_SOURCE", + "CM_MLPERF_INFERENCE_SOURCE_VERSION", "CM_MLPERF_INFERENCE_VERSION", "CM_MLPERF_INFERENCE_VISION_PATH", "CM_MLPERF_LAST_RELEASE", diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py index 7ba1b62be3..8f5a160923 100644 --- a/script/get-mlperf-inference-src/customize.py +++ b/script/get-mlperf-inference-src/customize.py @@ -85,7 +85,7 @@ def postprocess(i): state = i['state'] inference_root = env['CM_MLPERF_INFERENCE_SOURCE'] - env['CM_MLPERF_INFERENCE_VISION_PATH'] = os.path.join(inference_root, 'inference', 'vision') + env['CM_MLPERF_INFERENCE_VISION_PATH'] = os.path.join(inference_root, 'vision') env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'] = os.path.join(inference_root, 'vision', 'classification_and_detection') env['CM_MLPERF_INFERENCE_BERT_PATH'] = os.path.join(inference_root, 'language', 'bert') env['CM_MLPERF_INFERENCE_GPTJ_PATH'] = os.path.join(inference_root, 'language', 'gpt-j') @@ -101,6 +101,11 @@ def postprocess(i): env['+PYTHONPATH']=[] env['+PYTHONPATH'].append(os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], 'python')) + if os.path.exists(os.path.join(inference_root, "loadgen", "VERSION.txt")): + with open(os.path.join(inference_root, "loadgen", "VERSION.txt")) as f: + version_info = f.read().strip() + env['CM_MLPERF_INFERENCE_SOURCE_VERSION'] = version_info + if env.get('CM_GET_MLPERF_IMPLEMENTATION_ONLY', '') == "yes": return {'return':0} diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml new file mode 100644 index 0000000000..7b24138acb --- /dev/null +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml @@ -0,0 +1,38 @@ +--- + resnet50: + Offline: + target_qps: 43000.0 + Server: + target_qps: 37000.0 + retinanet: + Offline: + target_qps: 650.0 + Server: + target_qps: 600 + bert-99: + Offline: + target_qps: 4000 + bert-99.9: + Offline: + target_qps: 4000 + 3d-unet-99: + Offline: + target_qps: 2.0 + 3d-unet-99.9: + Offline: + target_qps: 2.0 + gptj-99.9: + Offline: + target_qps: 4 + Server: + target_qps: 3.5 + gptj-99: + Offline: + target_qps: 4 + Server: + target_qps: 3.5 + sdxl: + Offline: + target_qps: 2 + Server: + target_qps: 1 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml new file mode 100644 index 0000000000..cffda0ea0c --- /dev/null +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -0,0 +1,42 @@ +--- + resnet50: + Offline: + target_qps: 88000.0 + Server: + target_qps: 73000.0 + retinanet: + Offline: + target_qps: 1300.0 + Server: + target_qps: 1200 + bert-99: + Offline: + target_qps: 8000 + Server: + target_qps: 6000 + bert-99.9: + Offline: + target_qps: 3500 + Server: + target_qps: 3000 + 3d-unet-99: + Offline: + target_qps: 8.0 + 3d-unet-99.9: + Offline: + target_qps: 8.0 + gptj-99.9: + Offline: + target_qps: 8 + Server: + target_qps: 7 + gptj-99: + Offline: + target_qps: 8 + Server: + target_qps: 7 + sdxl: + Offline: + target_qps: 2 + Server: + target_qps: 1 diff --git a/script/get-mlperf-inference-sut-configs/customize.py b/script/get-mlperf-inference-sut-configs/customize.py index e4a6a40481..27461d7b24 100644 --- a/script/get-mlperf-inference-sut-configs/customize.py +++ b/script/get-mlperf-inference-sut-configs/customize.py @@ -27,7 +27,7 @@ def postprocess(i): implementation_string = env['CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX'] if env.get('CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX', '') != '' else env.get('CM_MLPERF_IMPLEMENTATION', 'default') run_config = [] - for i in range(1,5): + for i in range(1,6): if env.get(f'CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX{i}', '') != '': run_config.append(env.get(f'CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX{i}')) @@ -51,11 +51,24 @@ def postprocess(i): if os.path.exists(config_path_default): shutil.copy(config_path_default, config_path) else: - print(f"Config file missing for given hw_name: '{env['CM_HW_NAME']}', implementation: '{implementation_string}', device: '{device}, backend: '{backend}', copying from default") - src_config = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs", "default", "config.yaml") - shutil.copy(src_config, config_path) - os.makedirs(os.path.dirname(config_path_default), exist_ok=True) - shutil.copy(src_config, config_path_default) + src_config_full = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs", env['CM_HW_NAME'], implementation_string+"-implementation", device+"-device", backend+"-framework", "framework-version-"+backend_version, run_config_string + "-config.yaml") + src_config_partial1 = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs", env['CM_HW_NAME'], implementation_string+"-implementation", device+"-device", backend+"-framework", "framework-version-"+backend_version, "default-config.yaml") + src_config_partial2 = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs", env['CM_HW_NAME'], implementation_string+"-implementation", device+"-device", backend+"-framework", "framework-version-default", "default-config.yaml") + src_config_partial3 = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs", env['CM_HW_NAME'], implementation_string+"-implementation", device+"-device", backend+"-framework", "default-config.yaml") + if os.path.exists(src_config_full): + shutil.copy(src_config_full, config_path) + elif os.path.exists(src_config_partial1): + shutil.copy(src_config_partial1, config_path) + elif os.path.exists(src_config_partial2): + shutil.copy(src_config_partial2, config_path) + elif os.path.exists(src_config_partial3): + shutil.copy(src_config_partial3, config_path) + else: + print(f"Config file missing for given hw_name: '{env['CM_HW_NAME']}', implementation: '{implementation_string}', device: '{device}, backend: '{backend}', copying from default") + src_config = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs", "default", "config.yaml") + shutil.copy(src_config, config_path) + os.makedirs(os.path.dirname(config_path_default), exist_ok=True) + shutil.copy(src_config, config_path_default) state['CM_SUT_CONFIG'][env['CM_SUT_NAME']] = yaml.load(open(config_path), Loader=yaml.SafeLoader) state['CM_SUT_CONFIG_NAME'] = env['CM_SUT_NAME'] diff --git a/script/get-mlperf-inference-sut-description/_cm.json b/script/get-mlperf-inference-sut-description/_cm.json index e5b8723c49..d350ee2eaa 100644 --- a/script/get-mlperf-inference-sut-description/_cm.json +++ b/script/get-mlperf-inference-sut-description/_cm.json @@ -4,6 +4,9 @@ "automation_uid": "5b4e0237da074764", "cache": false, "category": "MLPerf benchmark support", + "docker": { + "run": false + }, "deps": [ { "tags": "detect,os" diff --git a/script/get-mlperf-inference-utils/mlperf_utils.py b/script/get-mlperf-inference-utils/mlperf_utils.py index 1da27dfae4..e09448caee 100644 --- a/script/get-mlperf-inference-utils/mlperf_utils.py +++ b/script/get-mlperf-inference-utils/mlperf_utils.py @@ -4,7 +4,7 @@ from log_parser import MLPerfLog -def get_result_from_log(version, model, scenario, result_path, mode): +def get_result_from_log(version, model, scenario, result_path, mode, inference_src_version = None): config = checker.Config( version, @@ -20,7 +20,14 @@ def get_result_from_log(version, model, scenario, result_path, mode): valid = {} if mode == "performance": has_power = os.path.exists(os.path.join(result_path, "..", "power")) - result_ = checker.get_performance_metric(config, mlperf_model, result_path, scenario, None, None, has_power) + version_tuple = None + if inference_src_version: + version_tuple = tuple(map(int, inference_src_version.split('.'))) + + if version_tuple and version_tuple >= (4,1,22): + result_ = checker.get_performance_metric(config, mlperf_model, result_path, scenario) + else: + result_ = checker.get_performance_metric(config, mlperf_model, result_path, scenario, None, None, has_power) mlperf_log = MLPerfLog(os.path.join(result_path, "mlperf_log_detail.txt")) if ( "result_validity" not in mlperf_log.get_keys() @@ -133,7 +140,7 @@ def get_accuracy_metric(config, model, path): return is_valid, acc_results, acc_targets, acc_limits -def get_result_string(version, model, scenario, result_path, has_power, sub_res, division="open", system_json=None, model_precision="fp32"): +def get_result_string(version, model, scenario, result_path, has_power, sub_res, division="open", system_json=None, model_precision="fp32", inference_src_version = None): config = checker.Config( version, @@ -152,8 +159,14 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res, inferred = False result = {} + version_tuple = None + if inference_src_version: + version_tuple = tuple(map(int, inference_src_version.split('.'))) - performance_result = checker.get_performance_metric(config, mlperf_model, performance_path, scenario, None, None, has_power) + if version_tuple and version_tuple >= (4,1,22): + performance_result = checker.get_performance_metric(config, mlperf_model, performance_path, scenario) + else: + performance_result = checker.get_performance_metric(config, mlperf_model, performance_path, scenario, None, None) if "stream" in scenario.lower(): performance_result_ = performance_result / 1000000 #convert to milliseconds else: diff --git a/script/get-platform-details/README-EXTRA.md b/script/get-platform-details/README-EXTRA.md new file mode 100644 index 0000000000..22b4875e86 --- /dev/null +++ b/script/get-platform-details/README-EXTRA.md @@ -0,0 +1,10 @@ +Please execute the following CM command to obtain the platform details of the System Under Test (SUT): + +``` +cm run script --tags=get,platform-details --platform_details_dir= +``` + + +The generated details will be saved as a text file in the specified directory. If no directory is specified, the generated text file will be saved in the CM cache + +A sample of the generated text file can be found [here](https://github.com/GATEOverflow/mlperf_inference_test_submissions_v5.0/blob/main/open/MLCommons/measurements/gh_action-reference-gpu-pytorch_v2.5.0-cu124/system_info.txt) diff --git a/script/get-platform-details/_cm.json b/script/get-platform-details/_cm.json index 3c6c5b4bd6..b02576ad9e 100644 --- a/script/get-platform-details/_cm.json +++ b/script/get-platform-details/_cm.json @@ -4,17 +4,44 @@ "automation_uid": "5b4e0237da074764", "cache": false, "category": "Platform information", + "input_mapping": { + "out_dir_path": "CM_PLATFORM_DETAILS_DIR_PATH", + "out_file_name": "CM_PLATFORM_DETAILS_FILE_NAME" + }, "deps": [ { "tags": "detect,os" }, { + "skip_if_env": { + "CM_HOST_OS_TYPE": [ + "windows" + ] + }, + "tags": "detect,sudo" + }, + { + "skip_if_env": { + "CM_HOST_OS_TYPE": [ + "windows" + ] + }, "tags": "get,sys-util,generic,_psmisc" }, { + "enable_if_env": { + "CM_HOST_OS_TYPE": [ + "linux" + ] + }, "tags": "get,sys-util,generic,_systemd" }, { + "enable_if_env": { + "CM_HOST_OS_TYPE": [ + "linux" + ] + }, "tags": "get,sys-util,generic,_dmidecode" } ], @@ -23,6 +50,9 @@ "enable_if_env": { "CM_INSTALL_NUMACTL": [ "True" + ], + "CM_HOST_OS_TYPE": [ + "linux" ] }, "tags": "get,sys-util,generic,_numactl" @@ -31,9 +61,15 @@ "enable_if_env": { "CM_INSTALL_CPUPOWER": [ "True" + ], + "CM_HOST_OS_TYPE": [ + "linux" ] }, - "tags": "get,sys-util,generic,_linux-tools" + "tags": "get,sys-util,generic,_linux-tools", + "env": { + "CM_TMP_FAIL_SAFE": "yes" + } } ], "tags": [ diff --git a/script/get-platform-details/customize.py b/script/get-platform-details/customize.py index d7affa2608..c618fa0194 100644 --- a/script/get-platform-details/customize.py +++ b/script/get-platform-details/customize.py @@ -13,17 +13,19 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] - if os_info['platform'] == "windows": - return {'return':1, 'error':'get-platform-details script not yet supported in windows!'} - - print(env['CM_HOST_OS_KERNEL_VERSION']) - if not check_installation("numactl",os_info): env['CM_INSTALL_NUMACTL'] = 'True' #if not check_installation("cpupower",os_info): env['CM_INSTALL_CPUPOWER'] = 'True' - + + if env.get('CM_PLATFORM_DETAILS_FILE_PATH', '') == '': + if env.get('CM_PLATFORM_DETAILS_DIR_PATH', '') == '': + env['CM_PLATFORM_DETAILS_DIR_PATH'] = os.getcwd() + if env.get('CM_PLATFORM_DETAILS_FILE_NAME', '') == '': + env['CM_PLATFORM_DETAILS_FILE_NAME'] = "system-info.txt" + env['CM_PLATFORM_DETAILS_FILE_PATH'] = os.path.join(env['CM_PLATFORM_DETAILS_DIR_PATH'], env['CM_PLATFORM_DETAILS_FILE_NAME']) + return {'return':0} diff --git a/script/get-platform-details/run-macos.sh b/script/get-platform-details/run-macos.sh new file mode 100644 index 0000000000..fcde181c03 --- /dev/null +++ b/script/get-platform-details/run-macos.sh @@ -0,0 +1 @@ +echo "WARNING: get-platform-details script is fully supported on linux systems only." diff --git a/script/get-platform-details/run.bat b/script/get-platform-details/run.bat index 73412b64ab..fcde181c03 100644 --- a/script/get-platform-details/run.bat +++ b/script/get-platform-details/run.bat @@ -1 +1 @@ -echo "This CM script not supported for windows yet" +echo "WARNING: get-platform-details script is fully supported on linux systems only." diff --git a/script/get-platform-details/run.sh b/script/get-platform-details/run.sh index 05fdb57de7..cab85ca55d 100644 --- a/script/get-platform-details/run.sh +++ b/script/get-platform-details/run.sh @@ -1,123 +1,138 @@ #!/bin/bash -OUTPUT_FILE="system_info.txt" - -echo "WARNING: sudo permission is needed to some packages for measuring the platform details" - -if [[ ${CM_HOST_OS_FLAVOR} == "macos" ]]; then - echo "WARNING: To be done for the mac os" -else - echo "Platform Details" > $OUTPUT_FILE - echo "" >> $OUTPUT_FILE - echo "------------------------------------------------------------" >> $OUTPUT_FILE - echo "1. uname -a" >> $OUTPUT_FILE - eval "uname -a" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "2. w" >> $OUTPUT_FILE - eval "w" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "3. Username" >> $OUTPUT_FILE - echo "From environment variable \$USER: $USER" >> $OUTPUT_FILE - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "4. ulimit -a" >> $OUTPUT_FILE - eval "ulimit -a" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "5. sysinfo process ancestry" >> $OUTPUT_FILE - eval "pstree" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "6. /proc/cpuinfo" >> $OUTPUT_FILE - eval "cat /proc/cpuinfo" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "7. lscpu" >> $OUTPUT_FILE - eval "lscpu" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "8. numactl --hardware" >> $OUTPUT_FILE - eval "numactl --hardware" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "9. /proc/meminfo" >> $OUTPUT_FILE - eval "cat /proc/meminfo" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "10. who -r" >> $OUTPUT_FILE - eval "who -r" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "11. Systemd service manager version" >> $OUTPUT_FILE - eval "systemctl --version | head -n 1" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "12. Services, from systemctl list-unit-files" >> $OUTPUT_FILE - eval "systemctl list-unit-files" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "13. Linux kernel boot-time arguments, from /proc/cmdline" >> $OUTPUT_FILE - eval "cat /proc/cmdline" >> $OUTPUT_FILE +OUTPUT_FILE="$CM_PLATFORM_DETAILS_FILE_PATH" +#set -e +#echo $OUTPUT_FILE +echo "WARNING: sudo permission is needed for some of the below commands" + +echo "Platform Details" > $OUTPUT_FILE +echo "" >> $OUTPUT_FILE +echo "------------------------------------------------------------" >> $OUTPUT_FILE +echo "1. uname -a" >> $OUTPUT_FILE +eval "uname -a" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "2. w" >> $OUTPUT_FILE +eval "w" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "3. Username" >> $OUTPUT_FILE +echo "From environment variable \$USER: $USER" >> $OUTPUT_FILE +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "4. ulimit -a" >> $OUTPUT_FILE +eval "ulimit -a" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "5. sysinfo process ancestry" >> $OUTPUT_FILE +eval "pstree" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "6. /proc/cpuinfo" >> $OUTPUT_FILE +eval "cat /proc/cpuinfo" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "7. lscpu" >> $OUTPUT_FILE +eval "lscpu" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "8. numactl --hardware" >> $OUTPUT_FILE +if [[ ${CM_SUDO_USER} == "yes" ]]; then + echo "${CM_SUDO} numactl --hardware" + eval "${CM_SUDO} numactl --hardware" >> $OUTPUT_FILE test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "14. cpupower frequency-info" >> $OUTPUT_FILE - eval "cpupower frequency-info" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "15. sysctl" >> $OUTPUT_FILE - eval "sudo sysctl -a" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "16. /sys/kernel/mm/transparent_hugepage" >> $OUTPUT_FILE - eval "cat /sys/kernel/mm/transparent_hugepage/enabled" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "17. /sys/kernel/mm/transparent_hugepage/khugepaged" >> $OUTPUT_FILE - eval "cat /sys/kernel/mm/transparent_hugepage/khugepaged/defrag" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "18. OS release" >> $OUTPUT_FILE - eval "cat /etc/os-release" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "19. Disk information" >> $OUTPUT_FILE - eval "lsblk" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "20. /sys/devices/virtual/dmi/id" >> $OUTPUT_FILE - eval "ls /sys/devices/virtual/dmi/id" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "21. dmidecode" >> $OUTPUT_FILE - eval "sudo dmidecode" >> $OUTPUT_FILE - test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE - - echo "22. BIOS" >> $OUTPUT_FILE - eval "sudo dmidecode -t bios" >> $OUTPUT_FILE +else + echo "Requires SUDO permission" >> $OUTPUT_FILE +fi +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "9. /proc/meminfo" >> $OUTPUT_FILE +eval "cat /proc/meminfo" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "10. who -r" >> $OUTPUT_FILE +eval "who -r" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "11. Systemd service manager version" >> $OUTPUT_FILE +eval "systemctl --version | head -n 1" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "12. Services, from systemctl list-unit-files" >> $OUTPUT_FILE +eval "systemctl list-unit-files" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "13. Linux kernel boot-time arguments, from /proc/cmdline" >> $OUTPUT_FILE +eval "cat /proc/cmdline" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "14. cpupower frequency-info" >> $OUTPUT_FILE +eval "cpupower frequency-info" >> $OUTPUT_FILE +test $? -eq 0 || echo "FAILED: cpupower frequency-info" >> $OUTPUT_FILE +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "15. sysctl" >> $OUTPUT_FILE +if [[ ${CM_SUDO_USER} == "yes" ]]; then + echo "${CM_SUDO} sysctl -a" + eval "${CM_SUDO} sysctl -a" >> $OUTPUT_FILE test $? -eq 0 || exit $? - echo "------------------------------------------------------------" >> $OUTPUT_FILE +else + echo "Requires SUDO permission" >> $OUTPUT_FILE +fi +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "16. /sys/kernel/mm/transparent_hugepage" >> $OUTPUT_FILE +eval "cat /sys/kernel/mm/transparent_hugepage/enabled" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "17. /sys/kernel/mm/transparent_hugepage/khugepaged" >> $OUTPUT_FILE +eval "cat /sys/kernel/mm/transparent_hugepage/khugepaged/defrag" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "18. OS release" >> $OUTPUT_FILE +eval "cat /etc/os-release" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "19. Disk information" >> $OUTPUT_FILE +eval "lsblk" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "20. /sys/devices/virtual/dmi/id" >> $OUTPUT_FILE +eval "ls /sys/devices/virtual/dmi/id" >> $OUTPUT_FILE +test $? -eq 0 || exit $? +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "21. dmidecode" >> $OUTPUT_FILE +if [[ ${CM_SUDO_USER} == "yes" ]]; then + eval "${CM_SUDO} dmidecode" >> $OUTPUT_FILE + test $? -eq 0 || echo "FAILED: dmidecode" >> $OUTPUT_FILE +else + echo "Requires SUDO permission" >> $OUTPUT_FILE +fi +echo "------------------------------------------------------------" >> $OUTPUT_FILE - echo "System information has been saved to $PWD/$OUTPUT_FILE" +echo "22. BIOS" >> $OUTPUT_FILE +if [[ ${CM_SUDO_USER} == "yes" ]]; then + eval "${CM_SUDO} dmidecode -t bios" >> $OUTPUT_FILE + test $? -eq 0 || echo "FAILED: dmidecode -t bios" >> $OUTPUT_FILE +else + echo "Requires SUDO permission" >> $OUTPUT_FILE fi +echo "------------------------------------------------------------" >> $OUTPUT_FILE + +echo "System information has been saved to $OUTPUT_FILE" diff --git a/script/get-sys-utils-cm/_cm.yaml b/script/get-sys-utils-cm/_cm.yaml index 3310c28e8a..4d3e755ed7 100644 --- a/script/get-sys-utils-cm/_cm.yaml +++ b/script/get-sys-utils-cm/_cm.yaml @@ -14,8 +14,6 @@ deps: env: CM_CLEAN_DIRS: bin CM_PACKAGE_WIN_URL: https://zenodo.org/records/13868077/files/cm-artifact-os-windows-32.zip?download=1 - ; https://cKnowledge.org/ai/data/zlib123dllx64-bin.zip - ; https://cKnowledge.org/ai/data/xz-5.2.9-win64.zip CM_SUDO: sudo input_mapping: diff --git a/script/get-sys-utils-min/_cm.yaml b/script/get-sys-utils-min/_cm.yaml index c121a75109..c07f46eb57 100644 --- a/script/get-sys-utils-min/_cm.yaml +++ b/script/get-sys-utils-min/_cm.yaml @@ -8,13 +8,21 @@ cache: true category: Detection or installation of tools and artifacts -deps: [] +deps: + - tags: detect,os + - tags: get,generic,sys-util,_xz + enable_if_env: + CM_HOST_OS_TYPE: + - windows + - tags: get,generic,sys-util,_zlib + enable_if_env: + CM_HOST_OS_TYPE: + - windows env: CM_CLEAN_DIRS: bin + CM_WINDOWS_SYS_UTILS_MIN_INSTALL: yes CM_PACKAGE_WIN_URL: https://zenodo.org/records/13868077/files/cm-artifact-os-windows-32.zip?download=1 - ; https://cKnowledge.org/ai/data/zlib123dllx64-bin.zip - ; https://cKnowledge.org/ai/data/xz-5.2.9-win64.zip CM_SUDO: sudo new_env_keys: diff --git a/script/install-generic-conda-package/_cm.json b/script/install-generic-conda-package/_cm.json index 4cefd679d4..120bf2d86e 100644 --- a/script/install-generic-conda-package/_cm.json +++ b/script/install-generic-conda-package/_cm.json @@ -61,6 +61,12 @@ "CM_CONDA_PKG_NAME": "#" } }, + "source.intel": { + "group": "package-source", + "env": { + "CM_CONDA_PKG_SRC": "https://software.repos.intel.com/python/conda/" + } + }, "source.#": { "group": "package-source", "env": { diff --git a/script/install-github-cli/run.bat b/script/install-github-cli/run.bat new file mode 100644 index 0000000000..2ec8b97186 --- /dev/null +++ b/script/install-github-cli/run.bat @@ -0,0 +1 @@ +choco install gh diff --git a/script/install-ipex-from-src/_cm.json b/script/install-ipex-from-src/_cm.json index e41efea533..7f2318c45a 100644 --- a/script/install-ipex-from-src/_cm.json +++ b/script/install-ipex-from-src/_cm.json @@ -214,10 +214,10 @@ "version": "1.23.5" }, { - "tags": "get,generic-python-lib,_package.torch,_path.https://download.pytorch.org/whl/nightly/cpu-cxx11-abi/torch-2.1.0.dev20230715%2Bcpu.cxx11.abi-cp39-cp39-linux_x86_64.whl", + "tags": "install,pytorch,from-src,_for-intel-mlperf-inference-v3.1-dlrm-v2", "names": [ - "pip-package", - "pip-torch" + "pytorch", + "torch" ] } ], diff --git a/script/install-pytorch-from-src/_cm.json b/script/install-pytorch-from-src/_cm.json index 9beb7e8c33..95693576de 100644 --- a/script/install-pytorch-from-src/_cm.json +++ b/script/install-pytorch-from-src/_cm.json @@ -63,6 +63,7 @@ "install", "get", "src", + "from-src", "from.src", "pytorch", "src-pytorch" @@ -419,6 +420,22 @@ } } }, + "for-intel-mlperf-inference-v3.1-dlrm-v2": { + "base": [ + "sha.927dc662386af052018212c7d01309a506fc94cd" + ], + "deps": [ + { + "tags": "get,cmake", + "version_min": "3.25.0" + } + ], + "ad": { + "pytorch-src-repo": { + "tags": "_no-recurse-submodules,_full-history" + } + } + }, "cuda": { "deps": [ { diff --git a/script/preprocess-mlperf-inference-submission/_cm.json b/script/preprocess-mlperf-inference-submission/_cm.json index 312ab7c2f1..6e1fade381 100644 --- a/script/preprocess-mlperf-inference-submission/_cm.json +++ b/script/preprocess-mlperf-inference-submission/_cm.json @@ -31,6 +31,7 @@ } ], "input_mapping": { + "input": "CM_MLPERF_INFERENCE_SUBMISSION_DIR", "submission_dir": "CM_MLPERF_INFERENCE_SUBMISSION_DIR", "submitter": "CM_MLPERF_SUBMITTER" }, diff --git a/script/preprocess-mlperf-inference-submission/customize.py b/script/preprocess-mlperf-inference-submission/customize.py index 03bca7cd9b..474e4a42ed 100644 --- a/script/preprocess-mlperf-inference-submission/customize.py +++ b/script/preprocess-mlperf-inference-submission/customize.py @@ -11,17 +11,21 @@ def preprocess(i): submission_dir = env.get("CM_MLPERF_INFERENCE_SUBMISSION_DIR", "") if submission_dir == "": - print("Please set CM_MLPERF_INFERENCE_SUBMISSION_DIR") + print("Please set --env.CM_MLPERF_INFERENCE_SUBMISSION_DIR") return {'return': 1, 'error':'CM_MLPERF_INFERENCE_SUBMISSION_DIR is not specified'} - submitter = env.get("CM_MLPERF_SUBMITTER", "cTuning") - submission_processed = submission_dir + "_processed" + if not os.path.exists(submission_dir): + print("Please set --env.CM_MLPERF_INFERENCE_SUBMISSION_DIR to a valid submission directory") + return {'return': 1, 'error':'CM_MLPERF_INFERENCE_SUBMISSION_DIR is not existing'} + + submission_dir = submission_dir.rstrip(os.path.sep) + submitter = env.get("CM_MLPERF_SUBMITTER", "MLCommons") + submission_processed = f"{submission_dir}_processed" if os.path.exists(submission_processed): + print(f"Cleaning {submission_processed}") shutil.rmtree(submission_processed) - os.system("rm -rf " + submission_dir + "_processed") - CMD = env['CM_PYTHON_BIN'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission", "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" env['CM_RUN_CMD'] = CMD @@ -39,5 +43,5 @@ def postprocess(i): shutil.copytree(submission_dir, submission_backup) shutil.rmtree(submission_dir) os.rename(submission_processed, submission_dir) - + return {'return':0} diff --git a/script/process-mlperf-accuracy/_cm.json b/script/process-mlperf-accuracy/_cm.json index cd4028a533..2d24cc9261 100644 --- a/script/process-mlperf-accuracy/_cm.json +++ b/script/process-mlperf-accuracy/_cm.json @@ -45,7 +45,7 @@ }, { "tags": "get,generic-python-lib,_package.datasets", - "names": + "names": [ "pip-package", "datasets" @@ -87,55 +87,55 @@ }, { "tags": "download,file,_url.https://raw.githubusercontent.com/mlcommons/inference_results_v4.0/main/closed/Intel/code/gptj-99/ITREX/evaluation.py", - "enable_if_env": { - "CM_MLPERF_IMPLEMENTATION": + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": [ - "intel" + "intel" ] - }, - "force_cache": true, - "extra_cache_tags": "intel,accuracy,file,gptj,mlperf,inference", - "env": { - "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_ACCURACY_FILE_WITH_PATH" - } - }, + }, + "force_cache": true, + "extra_cache_tags": "intel,accuracy,file,gptj,mlperf,inference", + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_ACCURACY_FILE_WITH_PATH" + } + }, { "tags": "download,file,_url.https://raw.githubusercontent.com/mlcommons/inference_results_v4.0/main/closed/Intel/code/gptj-99/ITREX/dataset.py", - "enable_if_env": { - "CM_MLPERF_IMPLEMENTATION": + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": [ - "intel" + "intel" ] - }, - "force_cache": true, - "extra_cache_tags": "intel,dataset,file,gptj,mlperf,inference", - "env": { - "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_FILE_WITH_PATH" - } - }, + }, + "force_cache": true, + "extra_cache_tags": "intel,dataset,file,gptj,mlperf,inference", + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_FILE_WITH_PATH" + } + }, { "tags": "download,file,_url.https://raw.githubusercontent.com/mlcommons/inference_results_v4.0/main/closed/Intel/code/gptj-99/ITREX/item.py", - "enable_if_env": { - "CM_MLPERF_IMPLEMENTATION": + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": [ - "intel" + "intel" ] - }, - "force_cache": true, - "extra_cache_tags": "intel,dataset,item,file,gptj,mlperf,inference", - "env": { - "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_ITEM_FILE_WITH_PATH" - } - }, + }, + "force_cache": true, + "extra_cache_tags": "intel,dataset,item,file,gptj,mlperf,inference", + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_ITEM_FILE_WITH_PATH" + } + }, { "tags": "get,ml-model,gptj,_fp32,_pytorch", - "enable_if_env": { - "CM_MLPERF_IMPLEMENTATION": + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": [ - "intel" + "intel" ] - } - } + } + } ], "env": { "CM_DATASET": "cnndm" @@ -285,7 +285,22 @@ "tags": "get,dataset-aux,squad-vocab" }, { - "tags": "get,generic-python-lib,_torch" + "tags": "get,generic-python-lib,_torch", + "skip_if_env": { + "CM_MLPERF_DEVICE": [ + "cuda", + "gpu" + ] + } + }, + { + "tags": "get,generic-python-lib,_torch_cuda", + "enable_if_env": { + "CM_MLPERF_DEVICE": [ + "cuda", + "gpu" + ] + } }, { "tags": "get,generic-python-lib,_tokenization" @@ -340,16 +355,34 @@ }, "openorca-gsm8k-mbxp": { "deps": [ + { + "tags": "get,generic-python-lib,_package.rouge_score", + "names": + [ + "pip-package", + "rouge-score" + ] + }, { "names": [ "openorca-gsm8k-mbxp-combined" ], + "skip_if_env": { + "CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST": [ + "yes" + ] + }, "tags": "get,dataset-mixtral,openorca-mbxp-gsm8k-combined" }, { "names": [ "mixtral-8x7b-model" ], + "skip_if_env": { + "CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST": [ + "yes" + ] + }, "tags": "get,ml-model,mixtral" } ], diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index d6536e13fd..bb5d7a2865 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -87,12 +87,12 @@ def preprocess(i): elif dataset == "openorca-gsm8k-mbxp-combined": accuracy_checker_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "mixtral-8x7b", "evaluate-accuracy.py") - CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + env['CM_ML_MODEL_MIXTRAL_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ - "' --dataset-file '" + env['CM_DATASET_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") +" > '" + out_file + "'" + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + env['MIXTRAL_CHECKPOINT_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ + "' --dataset-file '" + env['CM_DATASET_MIXTRAL_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") +" > '" + out_file + "'" elif dataset == "coco2014": - env['+PYTHONPATH'] = [ os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools") ] + env['+PYTHONPATH'] = [ os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools") , os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", "fid") ] extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': @@ -101,7 +101,10 @@ def preprocess(i): if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': extra_options += f" --compliance-images-path '{env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " else: - extra_options += f" --compliance-images-path {os.path.join(result_dir, 'images')} " + extra_options += f""" --compliance-images-path '{os.path.join(result_dir, "images")}' """ + + if env.get('CM_COCO2014_SAMPLE_ID_PATH','') != '': + extra_options += f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': extra_options += f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " diff --git a/script/pull-git-repo/run.sh b/script/pull-git-repo/run.sh index 66cf8406f0..db8612d56d 100644 --- a/script/pull-git-repo/run.sh +++ b/script/pull-git-repo/run.sh @@ -7,10 +7,11 @@ path=${CM_GIT_CHECKOUT_PATH} echo "cd $path" cd $path -test $? -eq 0 || exit 1 +test $? -eq 0 || exit $? echo ${CM_GIT_PULL_CMD} eval ${CM_GIT_PULL_CMD} -test $? -eq 0 || exit 1 +#don't fail if there are local changes +#test $? -eq 0 || exit $? cd $CUR_DIR diff --git a/script/push-mlperf-inference-results-to-github/_cm.json b/script/push-mlperf-inference-results-to-github/_cm.json index 132b590b69..bff6447fb3 100644 --- a/script/push-mlperf-inference-results-to-github/_cm.json +++ b/script/push-mlperf-inference-results-to-github/_cm.json @@ -31,7 +31,7 @@ } ], "default_env": { - "CM_MLPERF_RESULTS_GIT_REPO_URL": "https://github.com/ctuning/mlperf_inference_submissions_v4.0" + "CM_MLPERF_RESULTS_GIT_REPO_URL": "https://github.com/mlcommons/mlperf_inference_submissions_v4.0" }, "input_mapping": { "repo_url": "CM_MLPERF_RESULTS_GIT_REPO_URL", diff --git a/script/push-mlperf-inference-results-to-github/run.bat b/script/push-mlperf-inference-results-to-github/run.bat new file mode 100644 index 0000000000..2052eb5644 --- /dev/null +++ b/script/push-mlperf-inference-results-to-github/run.bat @@ -0,0 +1,31 @@ +@echo off + +REM Check if CM_GIT_REPO_CHECKOUT_PATH is set +if not defined CM_GIT_REPO_CHECKOUT_PATH ( + echo "Error: CM_GIT_REPO_CHECKOUT_PATH is not set." + exit /b 1 +) + +cd /d "%CM_GIT_REPO_CHECKOUT_PATH%" +if %errorlevel% neq 0 ( + echo "Error: Failed to change directory to %CM_GIT_REPO_CHECKOUT_PATH%" + exit /b 1 +) + +git pull +git add * + +REM Check if the CM_MLPERF_INFERENCE_SUBMISSION_DIR variable is set +if defined CM_MLPERF_INFERENCE_SUBMISSION_DIR ( + robocopy "%CM_MLPERF_INFERENCE_SUBMISSION_DIR%" "%CM_GIT_REPO_CHECKOUT_PATH%" /E /COPYALL /DCOPY:DAT + git add * +) + +REM Check if the previous command was successful +if %errorlevel% neq 0 exit /b %errorlevel% + +git commit -a -m "%CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE%" +git push + +REM Check if the previous command was successful +if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/script/push-mlperf-inference-results-to-github/run.sh b/script/push-mlperf-inference-results-to-github/run.sh index ac3a50d9f4..1eb4f663e4 100644 --- a/script/push-mlperf-inference-results-to-github/run.sh +++ b/script/push-mlperf-inference-results-to-github/run.sh @@ -1,10 +1,16 @@ #!/bin/bash -cd "${CM_GIT_CHECKOUT_PATH}" +# Check if CM_GIT_REPO_CHECKOUT_PATH is set +if [ -z "${CM_GIT_REPO_CHECKOUT_PATH}" ]; then + echo "Error: CM_GIT_REPO_CHECKOUT_PATH is not set." + exit 1 +fi + +cd "${CM_GIT_REPO_CHECKOUT_PATH}" git pull git add * if [[ -n ${CM_MLPERF_INFERENCE_SUBMISSION_DIR} ]]; then - rsync -avz "${CM_MLPERF_INFERENCE_SUBMISSION_DIR}/" "${CM_GIT_CHECKOUT_PATH}/" + rsync -avz "${CM_MLPERF_INFERENCE_SUBMISSION_DIR}/" "${CM_GIT_REPO_CHECKOUT_PATH}/" git add * fi test $? -eq 0 || exit $? diff --git a/script/run-docker-container/_cm.yaml b/script/run-docker-container/_cm.yaml index d070bcad36..5a3d2f48c5 100644 --- a/script/run-docker-container/_cm.yaml +++ b/script/run-docker-container/_cm.yaml @@ -15,6 +15,7 @@ category: Docker automation default_env: CM_DOCKER_DETACHED_MODE: 'yes' + CM_DOCKER_REUSE_EXISTING_CONTAINER: 'no' input_mapping: all_gpus: CM_DOCKER_ADD_ALL_GPUS @@ -25,6 +26,7 @@ input_mapping: detached: CM_DOCKER_DETACHED_MODE device: CM_DOCKER_ADD_DEVICE docker_image_base: CM_DOCKER_IMAGE_BASE + docker_base_image: CM_DOCKER_IMAGE_BASE keep_detached: CM_KEEP_DETACHED_CONTAINER docker_os: CM_DOCKER_OS docker_os_version: CM_DOCKER_OS_VERSION diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 84114d7af5..00ab63c978 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -59,13 +59,16 @@ def preprocess(i): return {'return':1, 'error':'Docker is either not installed or not started:\n{}'.format(e)} output_split = docker_container.split("\n") - if len(output_split) > 1: #container exists + if len(output_split) > 1 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '')).lower() in [ "1", "true", "yes" ]: #container exists out = output_split[1].split(" ") existing_container_id = out[0] + print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id - else: + if env.get('CM_DOCKER_CONTAINER_ID', '') != '': + del(env['CM_DOCKER_CONTAINER_ID']) #not valid ID + CMD = "docker images -q " + DOCKER_CONTAINER if os_info['platform'] == 'windows': @@ -217,6 +220,8 @@ def postprocess(i): print ('') docker_out = subprocess.check_output(CMD, shell=True).decode("utf-8") + #if docker_out != 0: + # return {'return': docker_out, 'error': 'docker run failed'} lines = docker_out.split("\n") @@ -253,6 +258,8 @@ def postprocess(i): print ('') docker_out = os.system(CMD) + if docker_out != 0: + return {'return': docker_out, 'error': 'docker run failed'} return {'return':0} @@ -300,12 +307,12 @@ def update_docker_info(env): if env.get('CM_DOCKER_IMAGE_NAME', '') != '': docker_image_name = env['CM_DOCKER_IMAGE_NAME'] else: - docker_image_name = 'cm-script-'+env['CM_DOCKER_RUN_SCRIPT_TAGS'].replace(',', '-').replace('_','-') + docker_image_name = 'cm-script-'+env['CM_DOCKER_RUN_SCRIPT_TAGS'].replace(',', '-').replace('_','-').replace('+','plus') env['CM_DOCKER_IMAGE_NAME'] = docker_image_name docker_image_tag_extra = env.get('CM_DOCKER_IMAGE_TAG_EXTRA', '-latest') - docker_image_tag = env.get('CM_DOCKER_IMAGE_TAG', docker_image_base.replace(':','-').replace('_','') + docker_image_tag_extra) + docker_image_tag = env.get('CM_DOCKER_IMAGE_TAG', docker_image_base.replace(':','-').replace('_','').replace("/","-") + docker_image_tag_extra) env['CM_DOCKER_IMAGE_TAG'] = docker_image_tag return diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index cefdf55d40..6afcc35270 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -34,6 +34,7 @@ default_env: CM_MLPERF_IMPLEMENTATION: reference CM_MLPERF_MODEL: resnet50 CM_MLPERF_RUN_STYLE: test + CM_MLPERF_SKIP_SUBMISSION_GENERATION: no input_mapping: api_server: CM_MLPERF_INFERENCE_API_SERVER @@ -43,6 +44,7 @@ input_mapping: category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE clean: CM_MLPERF_CLEAN_ALL compliance: CM_MLPERF_LOADGEN_COMPLIANCE + custom_system_nvidia: CM_CUSTOM_SYSTEM_NVIDIA dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM @@ -55,6 +57,8 @@ input_mapping: execution_mode: CM_MLPERF_RUN_STYLE find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE framework: CM_MLPERF_BACKEND + docker_keep_alive: CM_DOCKER_CONTAINER_KEEP_ALIVE + get_platform_details: CM_GET_PLATFORM_DETAILS gpu_name: CM_NVIDIA_GPU_NAME hw_name: CM_HW_NAME pip_loadgen: CM_MLPERF_INFERENCE_LOADGEN_INSTALL_FROM_PIP @@ -66,6 +70,7 @@ input_mapping: model: CM_MLPERF_MODEL multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY network: CM_NETWORK_LOADGEN + nvidia_system_name: CM_NVIDIA_SYSTEM_NAME offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS output_dir: OUTPUT_BASE_DIR output_summary: MLPERF_INFERENCE_SUBMISSION_SUMMARY @@ -75,6 +80,8 @@ input_mapping: precision: CM_MLPERF_MODEL_PRECISION preprocess_submission: CM_RUN_MLPERF_SUBMISSION_PREPROCESSOR push_to_github: CM_MLPERF_RESULT_PUSH_TO_GITHUB + pull_changes: CM_MLPERF_INFERENCE_PULL_CODE_CHANGES + pull_inference_changes: CM_MLPERF_INFERENCE_PULL_SRC_CHANGES readme: CM_MLPERF_README regenerate_accuracy_file: CM_MLPERF_REGENERATE_ACCURACY_FILE regenerate_files: CM_REGENERATE_MEASURE_FILES @@ -105,6 +112,7 @@ input_mapping: max_test_duration: CM_MLPERF_MAX_DURATION_TEST all_models: CM_MLPERF_ALL_MODELS criteo_day23_raw_data_path: CM_CRITEO_DAY23_RAW_DATA_PATH + use_dataset_from_host: CM_USE_DATASET_FROM_HOST new_state_keys: - app_mlperf_inference_* @@ -140,7 +148,8 @@ deps: - tags: install,pip-package,for-cmind-python,_package.tabulate - tags: get,mlperf,inference,utils -docker: +#We use this script as a command generator to run docker via app-mlperf-inference script +docker_off: mounts: - ${{ INSTALL_DATA_PATH }}:/install_data - ${{ DATA_PATH }}:/data @@ -220,7 +229,6 @@ variations: tags: _full env: CM_MLPERF_SUBMISSION_GENERATION_STYLE: full - CM_MLPERF_SKIP_SUBMISSION_GENERATION: 'yes' group: submission-generation-style performance-only: @@ -248,6 +256,7 @@ variations: - short env: CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX4: scc24-base + CM_DOCKER_IMAGE_NAME: scc24 adr: coco2014-preprocessed: tags: _size.50,_with-sample-ids @@ -255,6 +264,9 @@ variations: tags: _size.50,_with-sample-ids nvidia-preprocess-data: extra_cache_tags: "scc24-base" + inference-src: + tags: _branch.dev + version: custom deps: - tags: clean,nvidia,scratch,_sdxl,_downloaded-data extra_cache_rm_tags: scc24-main @@ -269,8 +281,12 @@ variations: tags: _size.500,_with-sample-ids nvidia-preprocess-data: extra_cache_tags: "scc24-main" + inference-src: + tags: _branch.dev + version: custom env: CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX4: scc24-main + CM_DOCKER_IMAGE_NAME: scc24 deps: - tags: clean,nvidia,scratch,_sdxl,_downloaded-data extra_cache_rm_tags: scc24-base @@ -373,9 +389,9 @@ variations: post_deps: - names: - submission-generator - enable_if_env: + skip_if_env: CM_MLPERF_SKIP_SUBMISSION_GENERATION: - - 'no' + - 'yes' tags: generate,mlperf,inference,submission versions: diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index e2361f2dee..0eb089abdd 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -14,6 +14,7 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] + const = i.get('const', {}) inp = i['input'] state = i['state'] @@ -22,6 +23,12 @@ def preprocess(i): if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": return {'return':0} + if env.get('CM_DOCKER_IMAGE_NAME', '') == 'scc24': + if env.get("CM_MLPERF_IMPLEMENTATION", "reference") == "reference": + env['CM_DOCKER_IMAGE_NAME'] = "scc24-reference" + elif "nvidia" in env.get("CM_MLPERF_IMPLEMENTATION", "reference"): + env['CM_DOCKER_IMAGE_NAME'] = "scc24-nvidia" + dump_version_info = env.get('CM_DUMP_VERSION_INFO', True) system_meta = state.get('CM_SUT_META', {}) @@ -194,6 +201,12 @@ def preprocess(i): if k.startswith("docker_"): docker_extra_input[k] = inp[k] inp = {} + if str(docker_dt).lower() in ["yes", "true", "1"]: + env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' # turning it off for the first run and after that we turn it on + env['CM_DOCKER_DETACHED_MODE'] = 'yes' + + if env.get('CM_DOCKER_IMAGE_NAME', '') != '': + docker_extra_input['docker_image_name'] = env['CM_DOCKER_IMAGE_NAME'] else: action = "run" @@ -220,14 +233,16 @@ def preprocess(i): env['CM_MLPERF_LOADGEN_MODE'] = mode env_copy = copy.deepcopy(env) + const_copy = copy.deepcopy(const) print(f"\nRunning loadgen scenario: {scenario} and mode: {mode}") ii = {'action':action, 'automation':'script', 'tags': scenario_tags, 'quiet': 'true', - 'env': env_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + 'env': env_copy, 'const': const_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} if action == "docker": for k in docker_extra_input: ii[k] = docker_extra_input[k] + r = cm.access(ii) if r['return'] > 0: return r @@ -242,6 +257,7 @@ def preprocess(i): print(f"\nStop Running loadgen scenario: {scenario} and mode: {mode}") return {'return': 0} # We run commands interactively inside the docker container else: + env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'yes' container_id = env_copy['CM_DOCKER_CONTAINER_ID'] env['CM_DOCKER_CONTAINER_ID'] = container_id if state.get('docker', {}): @@ -252,7 +268,7 @@ def preprocess(i): env['CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] = test env['CM_MLPERF_LOADGEN_MODE'] = "compliance" ii = {'action':action, 'automation':'script', 'tags': scenario_tags, 'quiet': 'true', - 'env': copy.deepcopy(env), 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + 'env': copy.deepcopy(env), 'const': copy.deepcopy(const), 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} if action == "docker": for k in docker_extra_input: @@ -263,6 +279,11 @@ def preprocess(i): if state.get('docker', {}): del(state['docker']) + if env.get('CM_DOCKER_CONTAINER_ID', '') != '' and str(env.get('CM_DOCKER_CONTAINER_KEEP_ALIVE', '')).lower() not in ["yes", "1", "true"]: + container_id = env['CM_DOCKER_CONTAINER_ID'] + CMD = f"docker kill {container_id}" + docker_out = subprocess.check_output(CMD, shell=True).decode("utf-8") + if state.get("cm-mlperf-inference-results"): #print(state["cm-mlperf-inference-results"]) for sut in state["cm-mlperf-inference-results"]:#only one sut will be there @@ -300,7 +321,7 @@ def get_valid_scenarios(model, category, mlperf_version, mlperf_path): internal_model_name = config[mlperf_version]["model_mapping"].get(model, model) - valid_scenarios = config[mlperf_version]["required-scenarios-"+category][internal_model_name] + valid_scenarios = config[mlperf_version]["required-scenarios-"+category.replace(",", "-")][internal_model_name] print("Valid Scenarios for " + model + " in " + category + " category are :" + str(valid_scenarios)) diff --git a/script/run-mlperf-inference-submission-checker/_cm.json b/script/run-mlperf-inference-submission-checker/_cm.json index a8c7d5752d..ab57623799 100644 --- a/script/run-mlperf-inference-submission-checker/_cm.json +++ b/script/run-mlperf-inference-submission-checker/_cm.json @@ -77,7 +77,9 @@ "input": "CM_MLPERF_INFERENCE_SUBMISSION_DIR", "submitter": "CM_MLPERF_SUBMITTER", "src_version": "CM_MLPERF_SUBMISSION_CHECKER_VERSION", - "repository": "CM_MLPERF_RESULTS_GIT_REPO_URL", + "repo_name": "CM_MLPERF_RESULTS_GIT_REPO_NAME", + "repo_owner": "CM_MLPERF_RESULTS_GIT_REPO_OWNER", + "repo_branch": "CM_MLPERF_RESULTS_GIT_REPO_BRANCH", "push_to_github": "CM_MLPERF_RESULT_PUSH_TO_GITHUB", "extra_model_benchmark_map": "CM_MLPERF_EXTRA_MODEL_MAPPING", "power": "CM_MLPERF_POWER", diff --git a/script/run-mlperf-inference-submission-checker/customize.py b/script/run-mlperf-inference-submission-checker/customize.py index 5c863bfca3..c0e943c75e 100644 --- a/script/run-mlperf-inference-submission-checker/customize.py +++ b/script/run-mlperf-inference-submission-checker/customize.py @@ -61,7 +61,17 @@ def preprocess(i): skip_compliance + extra_map + power_check + extra_args x_version = ' --version ' + version[1:] +' ' if version!='' else '' - x_submission_repository = ' --repository ' + env.get('CM_MLPERF_RESULTS_GIT_REPO_URL', f'https://github.com/mlcommons/submissions_inference_results_{version}') + + x_submission_repo_name = '' + x_submission_repo_owner = '' + x_submission_repo_branch = '' + + if env.get('CM_MLPERF_RESULTS_GIT_REPO_NAME', '') != '': + x_submission_repo_name = f""" --repository {env['CM_MLPERF_RESULTS_GIT_REPO_NAME']}""" + if env.get('CM_MLPERF_RESULTS_GIT_REPO_OWNER', '') != '': + x_submission_repo_owner = f""" --repository-owner {env['CM_MLPERF_RESULTS_GIT_REPO_OWNER']}""" + if env.get('CM_MLPERF_RESULTS_GIT_REPO_BRANCH', '') != '': + x_submission_repo_branch = f""" --repository-branch {env['CM_MLPERF_RESULTS_GIT_REPO_BRANCH']}""" report_generator_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission", "generate_final_report.py") @@ -69,7 +79,9 @@ def preprocess(i): print(CMD) env['CM_POST_RUN_CMD'] = env['CM_PYTHON_BIN_WITH_PATH'] +' ' + q + report_generator_file + q + ' --input summary.csv ' + \ x_version + \ - x_submission_repository + x_submission_repo_name + \ + x_submission_repo_owner + \ + x_submission_repo_branch return {'return':0} @@ -83,6 +95,9 @@ def postprocess(i): if x!='': env['CM_TAR_OUTFILE']=x + if env.get('CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR', '') != '': + env['CM_TAR_OUTPUT_DIR'] = env['CM_MLPERF_INFERENCE_SUBMISSION_BASE_DIR'] + x=env.get('MLPERF_INFERENCE_SUBMISSION_SUMMARY','') if x!='': for y in ['.csv', '.json', '.xlsx']: diff --git a/script/test-cm-core/src/script/test_docker.py b/script/test-cm-core/src/script/test_docker.py index 0ec418f214..9473997ad5 100644 --- a/script/test-cm-core/src/script/test_docker.py +++ b/script/test-cm-core/src/script/test_docker.py @@ -9,7 +9,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@ck', + 'docker_cm_repo': 'mlcommons@cm4mlops', 'image_name':'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', @@ -26,7 +26,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@ck', + 'docker_cm_repo': 'mlcommons@cm4mlops', 'image_name':'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', diff --git a/script/test-cm-scripts/_cm.yaml b/script/test-cm-scripts/_cm.yaml new file mode 100644 index 0000000000..b8b7a9c791 --- /dev/null +++ b/script/test-cm-scripts/_cm.yaml @@ -0,0 +1,31 @@ +alias: test-cm-scripts +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +tags: +- test +- test-scripts +- cm-test +env: + CM_VAR1: orig +const: + CM_CVAR1: orig +new_env_keys: + - CM_VAR* + - CM_CVAR* + +uid: 6fbe3884575c4e51 +variations: + v1: + env: + CM_VAR1: v1 + v2: + env: + CM_VAR1: v2 + CM_VAR2: v2 + const: + CM_VAR2: constv2 + v1,v2: + env: + CM_VAR1: combv1v2 + CM_VAR2: combv1v2 diff --git a/script/test-cm-scripts/customize.py b/script/test-cm-scripts/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/test-cm-scripts/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/test-cm-scripts/run.bat b/script/test-cm-scripts/run.bat new file mode 100644 index 0000000000..648302ca71 --- /dev/null +++ b/script/test-cm-scripts/run.bat @@ -0,0 +1 @@ +rem native script diff --git a/script/test-cm-scripts/run.sh b/script/test-cm-scripts/run.sh new file mode 100644 index 0000000000..4c23c380ea --- /dev/null +++ b/script/test-cm-scripts/run.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + +echo "Running: " +echo "${CM_RUN_CMD}" +echo "" + +if [[ ${CM_FAKE_RUN} != "yes" ]]; then + eval "${CM_RUN_CMD}" + test $? -eq 0 || exit 1 +fi diff --git a/setup.py b/setup.py index 2ff4112866..fed5bd91d1 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ -# setup.py +# Build a whl file for cm4mlperf-inference + from setuptools import setup from setuptools._distutils.dist import Distribution from setuptools.command.install import install @@ -7,6 +8,7 @@ import importlib.util import platform import os +import shutil # Try to use importlib.metadata for Python 3.8+ try: @@ -22,7 +24,6 @@ PackageNotFoundError = pkg_resources.DistributionNotFound - class CustomInstallCommand(install): def run(self): self.get_sys_platform() @@ -37,13 +38,17 @@ def run(self): def is_package_installed(self, package_name): try: if sys.version_info >= (3, 8): - version(package_name) # Tries to get the version of the package + spec = importlib.util.find_spec(package_name) + module = importlib.util.module_from_spec(spec) + sys.modules[package_name] = module + spec.loader.exec_module(module) else: pkg_resources.get_distribution(package_name) # Fallback for < 3.8 return True except PackageNotFoundError: return False + def install_system_packages(self): # List of packages to install via system package manager packages = [] @@ -72,8 +77,17 @@ def install_system_packages(self): manager, details = self.get_package_manager_details() if manager: if manager == "apt-get": - subprocess.check_call(['sudo', 'apt-get', 'update']) - subprocess.check_call(['sudo', 'apt-get', 'install', '-y'] + packages) + # Check if 'sudo' is available + if shutil.which('sudo'): + subprocess.check_call(['sudo', 'apt-get', 'update']) + subprocess.check_call(['sudo', 'apt-get', 'install', '-y'] + packages) + else: + print("sudo not found, trying without sudo.") + try: + subprocess.check_call(['apt-get', 'update']) + subprocess.check_call(['apt-get', 'install', '-y'] + packages) + except subprocess.CalledProcessError: + print(f"Installation of {packages} without sudo failed. Please install these packages manually to continue!") elif self.system == 'Windows': print(f"Please install the following packages manually: {packages}") @@ -127,11 +141,22 @@ def custom_function(self): def get_sys_platform(self): self.system = platform.system() +# Read long description and version +def read_file(file_name, default=""): + if os.path.isfile(file_name): + with open(file_name, "r") as f: + return f.read().strip() + return default + +long_description = read_file("README.md", "No description available.") +version_ = read_file("VERSION", "0.3.1") + setup( name='cm4mlops', - version='0.1', - long_description='CM automations and scripts for MLOps', - long_description_content_type='text/x-rst', + version=version_, + long_description=long_description, + long_description_content_type='text/markdown', + url="https://github.com/mlcommons/cm4mlops", packages=[], install_requires=[ "setuptools>=60", diff --git a/tests/script/check.py b/tests/script/check.py index 7394406d8f..aba7f7831c 100644 --- a/tests/script/check.py +++ b/tests/script/check.py @@ -12,3 +12,13 @@ def check_list(r, string, found=True): raise Exception('CM search returned an empty list for ' + string) if len(r['list']) > 0 and not found: raise Exception('CM search returned at lease one entry for ' + string) + +def check_key_value(d, key, value, absent_ok=False): + if not d.get(key): + if absent_ok: + return True + else: + raise Exception(f"{key} is missing. Current values are {d}") + elif d[key] != value: + raise Exception(f"{key} is not having the expected value of {value}. Current value is {d[key]}") + diff --git a/tests/script/process_tests.py b/tests/script/process_tests.py new file mode 100644 index 0000000000..a9a7d0e555 --- /dev/null +++ b/tests/script/process_tests.py @@ -0,0 +1,38 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files=sys.argv[1:] + +for file in files: + print(file) + if not os.path.isfile(file) or not "script" in file: + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + script_path = os.path.dirname(file) + f = open(file) + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + if data.get('uid', '') == '': + continue #not a CM script meta + uid = data['uid'] + + ii = { + 'action':'test', 'automation':'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con' + } + if os.environ.get('DOCKER_CM_REPO', '') != '': + ii['docker_cm_repo'] = os.environ['DOCKER_CM_REPO'] + if os.environ.get('DOCKER_CM_REPO_BRANCH', '') != '': + ii['docker_cm_repo_branch'] = os.environ['DOCKER_CM_REPO_BRANCH'] + if os.environ.get('TEST_INPUT_INDEX', '') != '': + ii['test_input_index'] = os.environ['TEST_INPUT_INDEX'] + print(ii) + r = cm.access(ii) + + checks.check_return(r) diff --git a/tests/script/test_docker.py b/tests/script/test_docker.py index 5a02c932ed..6b95143aae 100644 --- a/tests/script/test_docker.py +++ b/tests/script/test_docker.py @@ -12,7 +12,8 @@ 'image_name':'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', - 'CM_MLOPS_REPO': 'ctuning@mlcommons-ck', + 'CM_MLOPS_REPO': 'mlcommons@cm4mlops', + 'CM_MLOPS_REPO_BRANCH': 'mlperf-inference', 'CM_DOCKER_IMAGE_BASE': 'ubuntu:22.04' }, 'quiet': 'yes' diff --git a/tests/script/test_features.py b/tests/script/test_features.py index d116cbd5bf..0679099321 100644 --- a/tests/script/test_features.py +++ b/tests/script/test_features.py @@ -18,4 +18,10 @@ checks.check_list(r, "_NHWC") r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'get,dataset,preprocessed,imagenet,-_NHWC'}) -checks.check_list(r, "_NHWC", False) +#checks.check_list(r, "-_NHWC", False) + + +r = cm.access({'action':'run', 'automation': 'script', 'tags': 'test-scripts,_v1,_v2'}) +new_env = r['new_env'] +checks.check_key_value(new_env, "CM_VAR1", "combv1v2") +checks.check_key_value(new_env, "CM_VAR2", "constv2") diff --git a/tests/tutorials/test_tutorial_retinanet.py b/tests/tutorials/test_tutorial_retinanet.py index 404b69fe93..9ecb2a3bef 100644 --- a/tests/tutorials/test_tutorial_retinanet.py +++ b/tests/tutorials/test_tutorial_retinanet.py @@ -21,7 +21,7 @@ r = cm.access({'action':'run', 'automation':'script', 'tags': 'install,python-venv', 'version': '3.10.8', 'name': 'mlperf' }) checks.check_return(r) -r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': \ +r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': \ {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community', \ 'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline', \ 'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'}) diff --git a/tests/tutorials/test_tutorial_tvm.py b/tests/tutorials/test_tutorial_tvm.py index 930e3622df..0d02b87a55 100644 --- a/tests/tutorials/test_tutorial_tvm.py +++ b/tests/tutorials/test_tutorial_tvm.py @@ -16,7 +16,7 @@ checks.check_return(r) -r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_dashboard', 'adr': \ +r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission', 'adr': \ {'python': {'name': 'mlperf', 'version_min': '3.8'}}, 'submitter': 'Community', \ 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', \ 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) diff --git a/tests/tutorials/test_tutorial_tvm_pip_ge.py b/tests/tutorials/test_tutorial_tvm_pip_ge.py index 0c9a4b9c33..47180fa774 100644 --- a/tests/tutorials/test_tutorial_tvm_pip_ge.py +++ b/tests/tutorials/test_tutorial_tvm_pip_ge.py @@ -13,7 +13,7 @@ 'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': \ +r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': \ {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, \ 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', \ 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) diff --git a/tests/tutorials/test_tutorial_tvm_pip_vm.py b/tests/tutorials/test_tutorial_tvm_pip_vm.py index 81069194d4..b9e47152af 100644 --- a/tests/tutorials/test_tutorial_tvm_pip_vm.py +++ b/tests/tutorials/test_tutorial_tvm_pip_vm.py @@ -15,7 +15,7 @@ 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': \ +r = cm.access({'action':'run', 'automation':'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': \ {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', \ 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', \ 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'})