diff --git a/.github/actions/handle_docker/action.yml b/.github/actions/handle_docker/action.yml index a0aa99c1442d5b..f79bc06afa9f5c 100644 --- a/.github/actions/handle_docker/action.yml +++ b/.github/actions/handle_docker/action.yml @@ -27,17 +27,17 @@ runs: using: 'composite' steps: - name: Checkout head - uses: actions/checkout@v4 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Checkout base - uses: actions/checkout@v4 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: ref: ${{ github.base_ref || github.event.merge_group.base_ref }} sparse-checkout: ${{ inputs.dockerfiles_root_dir }}/docker_tag path: base - name: Install Python dependencies - uses: py-actions/py-dependency-install@v4 + uses: py-actions/py-dependency-install@30aa0023464ed4b5b116bd9fbdab87acf01a484e # v4.1.0 with: path: "${{ github.action_path }}/requirements.txt" update-setuptools: "false" @@ -45,7 +45,10 @@ runs: - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3.7.1 + with: + driver-opts: | + image=${{ inputs.registry }}/dockerio/moby/buildkit:buildx-stable-1 - name: Handle docker images id: handle_images @@ -62,7 +65,7 @@ runs: --base_tag_file "base/${{ inputs.dockerfiles_root_dir }}/docker_tag" \ --docker_env_changed "${{ fromJSON(inputs.changed_components).docker_env }}" \ --dockerfiles_changed "${{ fromJSON(inputs.changed_components).dockerfiles }}" \ - --docker_builder "${{ steps.buildx.outputs.name}}" \ + --docker_builder "${{ steps.buildx.outputs.name }}" \ --repo "${{ github.repository }}" \ --ref_name "${{ github.ref_name }}" \ $([[ -n $pr ]] && echo "--pr $pr" || echo '-s ${{ github.sha }}') \ diff --git a/.github/actions/setup_python/action.yml b/.github/actions/setup_python/action.yml index 0d9138bc643d2a..96968f55636df9 100644 --- a/.github/actions/setup_python/action.yml +++ b/.github/actions/setup_python/action.yml @@ -56,13 +56,13 @@ runs: - if: ${{ runner.os == 'macOS' || runner.os == 'Windows' || (runner.os == 'Linux' && runner.arch != 'ARM64' && steps.check_python.outputs.installed == 'false' ) }} name: Setup Python ${{ inputs.version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ inputs.version }} env: PIP_CACHE_DIR: ${{ inputs.self-hosted-runner == 'true' && inputs.pip-cache-path || '' }} - - if: ${{ inputs.should-setup-pip-paths == 'true' }} + - if: ${{ inputs.should-setup-pip-paths == 'true' && runner.os != 'Windows' }} name: Setup pip variables (cache and install path) shell: bash run: | @@ -71,6 +71,14 @@ runs: echo "PIP_CACHE_DIR=${{ inputs.pip-cache-path }}/${PIP_VER}" >> $GITHUB_ENV echo "PIP_INSTALL_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" >> $GITHUB_ENV + - if: ${{ inputs.should-setup-pip-paths == 'true' && runner.os == 'Windows' }} + name: Setup pip cache path + shell: pwsh + run: | + $pipVersion = python3 -c "import pip; print(pip.__version__)" + Write-Host "Using pip version: $pipVersion" + "PIP_CACHE_DIR=${{ inputs.pip-cache-path }}/$pipVersion" >> $env:GITHUB_ENV + - if: ${{ inputs.show-cache-info == 'true' }} name: Get pip cache info shell: bash diff --git a/.github/actions/smart-ci/action.yml b/.github/actions/smart-ci/action.yml index cd111d617ddc1b..ec65d2f1e9e82c 100644 --- a/.github/actions/smart-ci/action.yml +++ b/.github/actions/smart-ci/action.yml @@ -30,7 +30,6 @@ inputs: components_config_schema: description: "Path to the schema file for components configuration" required: false - default: ".github/actions/smart-ci/components_schema.yml" labeler_config: description: "Path to labeler configuration file" required: false @@ -66,7 +65,7 @@ runs: using: "composite" steps: - name: Wait for labeler to finish - uses: lewagon/wait-on-check-action@v1.3.1 + uses: lewagon/wait-on-check-action@ccfb013c15c8afb7bf2b7c028fb74dc5a068cccc # v1.3.4 if: ${{ github.event_name == 'pull_request' }} with: ref: ${{ github.event.pull_request.head.sha }} @@ -75,13 +74,13 @@ runs: wait-interval: 10 - name: checkout components file - uses: actions/checkout@v4 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: sparse-checkout: .github/components.yml sparse-checkout-cone-mode: false - name: Install Python dependencies - uses: py-actions/py-dependency-install@v4 + uses: py-actions/py-dependency-install@30aa0023464ed4b5b116bd9fbdab87acf01a484e # v4.1.0 with: path: "${{ github.action_path }}/requirements.txt" update-setuptools: "false" @@ -101,7 +100,7 @@ runs: -f "${{ inputs.ref_name }}" \ -p "${{ inputs.component_pattern }}" \ -c "${{ inputs.components_config }}" \ - -m "${{ inputs.components_config_schema }}" \ + -m "${{ inputs.components_config_schema || env.DEFAULT_CONFIG_SCHEMA }}" \ -l "${{ inputs.labeler_config }}" \ --enable_for_org "${{ inputs.enable_for_org }}" \ --skip-when-only-listed-labels-set "${{ inputs.skip_when_only_listed_labels_set }}" \ @@ -109,3 +108,4 @@ runs: shell: bash env: GITHUB_TOKEN: ${{ inputs.repo_token }} + DEFAULT_CONFIG_SCHEMA: "${{ github.action_path }}/components_schema.yml" diff --git a/.github/dependency_review.yml b/.github/dependency_review.yml index 5636a441501fc8..3240502f4e94b5 100644 --- a/.github/dependency_review.yml +++ b/.github/dependency_review.yml @@ -2,13 +2,10 @@ fail-on-severity: 'low' allow-licenses: - 'BSD-2-Clause' - 'BSD-3-Clause' - - 'BSD-2-Clause AND BSD-3-Clause' - - 'BSD-3-Clause AND BSD-3-Clause-Clear' + - 'BSD-3-Clause-Clear' - 'MIT' - 'Apache-2.0' - - 'Apache-2.0 AND BSD-3-Clause' - 'ISC' - - 'Apache-2.0 AND MIT' - 'BlueOak-1.0.0' - '0BSD' - 'Python-2.0' diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 56faa37d1da67f..3783a7e8d5600a 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-26993 \ No newline at end of file +pr-27430 diff --git a/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile b/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile new file mode 100644 index 00000000000000..59239575be329c --- /dev/null +++ b/.github/dockerfiles/ov_build/manylinux2014_x86_64/Dockerfile @@ -0,0 +1,20 @@ +ARG REGISTRY="quay.io" +FROM openvinogithubactions.azurecr.io/quayio/pypa/manylinux2014_x86_64 + +USER root + +# Install build dependencies +ADD install_build_dependencies.sh /install_build_dependencies.sh +RUN chmod +x /install_build_dependencies.sh && /install_build_dependencies.sh + +# Install sscache +ARG SCCACHE_VERSION="v0.7.5" +ENV SCCACHE_HOME="/opt/sccache" \ + SCCACHE_PATH="/opt/sccache/sccache" + +RUN mkdir ${SCCACHE_HOME} && cd ${SCCACHE_HOME} && \ + SCCACHE_ARCHIVE="sccache-${SCCACHE_VERSION}-x86_64-unknown-linux-musl.tar.gz" && \ + curl -SLO https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/${SCCACHE_ARCHIVE} && \ + tar -xzf ${SCCACHE_ARCHIVE} --strip-components=1 && rm ${SCCACHE_ARCHIVE} + +ENV PATH="$SCCACHE_HOME:$PATH" diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile new file mode 100644 index 00000000000000..2d5bc1c878069a --- /dev/null +++ b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_docker/Dockerfile @@ -0,0 +1,42 @@ +ARG REGISTRY="docker.io" +FROM ${REGISTRY}/library/ubuntu:22.04 + +USER root + +# APT configuration +RUN echo 'Acquire::Retries "10";' > /etc/apt/apt.conf && \ + echo 'APT::Get::Assume-Yes "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::Fix-Broken "true";' >> /etc/apt/apt.conf && \ + echo 'APT::Get::no-install-recommends "true";' >> /etc/apt/apt.conf + +ENV DEBIAN_FRONTEND="noninteractive" \ + TZ="Europe/London" + +RUN apt-get update && \ + apt-get install software-properties-common && \ + add-apt-repository --yes --no-update ppa:git-core/ppa && \ + add-apt-repository --yes --no-update ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install \ + curl \ + git \ + gpg-agent \ + tzdata \ + # parallel gzip + pigz \ + python3 \ + python3-pip \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install docker +RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \ + gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] \ + https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null + +RUN apt-get update && \ + apt-get install -y docker-ce docker-ce-cli containerd.io + +ENV DOCKER_BUILDKIT=1 \ No newline at end of file diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 6a67e8f6793ec9..e0954871f4b51e 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -53,7 +53,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -98,7 +98,7 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: android_arm64 steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' @@ -116,7 +116,7 @@ jobs: popd - name: Clone vcpkg - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'microsoft/vcpkg' ref: ${{ env.VCPKG_VERSION }} @@ -169,7 +169,7 @@ jobs: run: ${SCCACHE_PATH} --zero-stats - name: Cmake - build - run: cmake --build ${BUILD_DIR} --parallel + run: cmake --build ${BUILD_DIR} --parallel $(nproc) - name: Show ccache stats run: ${SCCACHE_PATH} --show-stats @@ -178,7 +178,7 @@ jobs: # Upload build logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml index cab5239b4c45c0..b0b46c662abdbb 100644 --- a/.github/workflows/android_x64.yml +++ b/.github/workflows/android_x64.yml @@ -27,7 +27,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -56,7 +56,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -97,13 +97,13 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: android_x64 steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO GenAI - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino.genai' path: ${{ env.OPENVINO_GENAI_REPO }} @@ -135,6 +135,7 @@ jobs: -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ -DENABLE_LTO=ON \ -DENABLE_PYTHON=OFF \ + -DENABLE_TESTS=ON \ -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_GENAI_REPO }} \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} @@ -143,7 +144,7 @@ jobs: run: ${SCCACHE_PATH} --zero-stats - name: Cmake - build - run: cmake --build ${BUILD_DIR} --parallel + run: cmake --build ${BUILD_DIR} --parallel $(nproc) - name: Show ccache stats run: ${SCCACHE_PATH} --show-stats @@ -152,7 +153,7 @@ jobs: # Upload build logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 53f3eba9a749bf..8c78375e61769c 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -18,7 +18,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' lfs: 'true' @@ -29,7 +29,7 @@ jobs: packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9 version: 3.0 - - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 id: cp310 with: python-version: '3.10' @@ -41,7 +41,7 @@ jobs: - name: Install python dependencies run: | python3 -m pip install -r docs/requirements.txt - (cd docs/openvino_sphinx_theme && python3 setup.py install) + (cd docs/openvino_sphinx_theme && python3 -m pip install .) python3 -m pip install docs/openvino_custom_sphinx_sitemap - name: Download and install doxygen @@ -63,7 +63,7 @@ jobs: - name: Cache documentation id: cache_sphinx_docs - uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: build/docs/_build/.doctrees key: sphinx-docs-cache @@ -77,13 +77,13 @@ jobs: echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV - name: 'Upload sphinx.log' - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: sphinx_build_log_${{ env.PR_NUMBER }}.log path: build/docs/sphinx.log - name: 'Upload docs html' - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_docs_html_${{ env.PR_NUMBER }}.zip path: build/docs/openvino_docs_html.zip @@ -100,7 +100,7 @@ jobs: - name: 'Upload test results' if: failure() - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_docs_pytest path: build/docs/_artifacts/ diff --git a/.github/workflows/check_pr_commits.yml b/.github/workflows/check_pr_commits.yml index 5710736f322652..f7f66be299876c 100644 --- a/.github/workflows/check_pr_commits.yml +++ b/.github/workflows/check_pr_commits.yml @@ -9,7 +9,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install dependencies run: python3 -m pip install -r ./.github/github_org_control/requirements.txt diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml index 53a426bfa32b42..3fc69b21374093 100644 --- a/.github/workflows/cleanup_caches.yml +++ b/.github/workflows/cleanup_caches.yml @@ -48,7 +48,7 @@ jobs: steps: - name: Checkout cach action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/cache @@ -70,7 +70,7 @@ jobs: steps: - name: Checkout cach action - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/cache diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index 5384ece83fb207..9337fdff4b2905 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -28,7 +28,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 777ba7694d0be5..40f80d330094b4 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -14,7 +14,7 @@ jobs: permissions: pull-requests: write steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' @@ -44,7 +44,7 @@ jobs: permissions: pull-requests: write steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' @@ -62,7 +62,7 @@ jobs: # always provide suggestions even for skipped scripts in ov_shellcheck tagret - name: ShellCheck action if: always() - uses: reviewdog/action-shellcheck@ccaafec556ffa154f112bfcb7b9c9574190b7091 # v1.27.0 + uses: reviewdog/action-shellcheck@22f96e34e9185b642c5567cc26d1df952f5c9d10 # v1.28.0 with: level: style reporter: github-pr-review @@ -76,7 +76,7 @@ jobs: runs-on: ubuntu-22.04 if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3aecbe2367da05..6cb0b2c5b6233c 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Setup python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10.10' architecture: 'x64' @@ -32,7 +32,7 @@ jobs: max-size: 50G - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: 'true' @@ -138,6 +138,6 @@ jobs: lcov --capture --directory ${{ github.workspace }}/. --output-file coverage.info genhtml coverage.info --output-directory coverage-report - name: Collect coverage - uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673 # v4.5.0 + uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4.6.0 with: verbose: true diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 6a163fb5e50043..5a08ec084dadac 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -25,7 +25,56 @@ env: TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} jobs: + + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg' + + Docker: + needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_20_04_x64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + Build: + needs: Docker timeout-minutes: 150 defaults: run: @@ -33,7 +82,9 @@ jobs: runs-on: aks-linux-16-cores-32gb if: ${{ github.repository_owner == 'openvinotoolkit' }} container: - image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_x64 }} + volumes: + - /mount:/mount env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'Release' @@ -45,20 +96,15 @@ jobs: COVERITY_TOOL_DIR: /__w/openvino/openvino/coverity_tool steps: - - name: Install git - run: | - apt-get update - apt-get install --assume-yes --no-install-recommends git ca-certificates - - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' ref: ${{ inputs.openvinoRef }} - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} @@ -86,7 +132,7 @@ jobs: # # Build # - + - name: CMake configure - OpenVINO run: | cmake \ @@ -107,13 +153,11 @@ jobs: popd - name: Cmake build - OpenVINO with Coverity - run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-build --dir ${BUILD_DIR}/cov-int cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-build --dir ${BUILD_DIR}/cov-int cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: Pack Artefacts - run: | - pushd ${BUILD_DIR} - tar -cvf - cov-int | pigz > openvino.tgz - popd + run: tar -cvf - cov-int | pigz > openvino.tgz + working-directory: ${{ env.BUILD_DIR }} - name: Submit artefacts run: | @@ -144,7 +188,7 @@ jobs: run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-configure -c ${COVERITY_TOOL_DIR}/cov-analysis-linux64-2023.6.2/config/coverity_config.xml -lscc text - name: Upload Coverity build log - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: coverity_logs @@ -152,7 +196,7 @@ jobs: if-no-files-found: 'error' - name: Upload Coverity build archive - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: coverity_archive diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml index f4db0a83a5a39f..73426222253adb 100644 --- a/.github/workflows/debian_10_arm.yml +++ b/.github/workflows/debian_10_arm.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -58,7 +58,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/dependency_review.yml b/.github/workflows/dependency_review.yml index 3dcd9a367b018c..59a1eaa6e1c26f 100644 --- a/.github/workflows/dependency_review.yml +++ b/.github/workflows/dependency_review.yml @@ -9,7 +9,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Dependency Review uses: actions/dependency-review-action@72eb03d02c7872a771aacd928f3123ac62ad6d3a # v4.3.3 diff --git a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml index 83770900559bab..ba458da5d3ec1a 100644 --- a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml +++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml @@ -32,7 +32,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -65,7 +65,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -109,7 +109,7 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -143,7 +143,7 @@ jobs: run: ${SCCACHE_PATH} --zero-stats - name: Cmake build - OpenVINO - run: cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + run: cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: Show sccache stats run: ${SCCACHE_PATH} --show-stats @@ -169,7 +169,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs @@ -178,7 +178,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -186,7 +186,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -295,7 +295,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -325,7 +325,7 @@ jobs: timeout-minutes: 25 - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: test-results-functional-cpu diff --git a/.github/workflows/fedora_29.yml b/.github/workflows/fedora_29.yml index 0ec0c409d12e0b..f3b101327f76dc 100644 --- a/.github/workflows/fedora_29.yml +++ b/.github/workflows/fedora_29.yml @@ -5,7 +5,7 @@ on: merge_group: push: branches: - # - master + - master - 'releases/**' concurrency: @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -58,7 +58,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/files_size.yml b/.github/workflows/files_size.yml index 3733ad48ca49d2..2768e731b6578b 100644 --- a/.github/workflows/files_size.yml +++ b/.github/workflows/files_size.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-22.04 if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: git ls-tree run: git ls-tree -r -t -l --full-name HEAD | sort -n -r -k 4 diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index b8eea4375e7e58..3964f049be2abb 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -91,7 +91,7 @@ jobs: PRODUCT_TYPE: public_linux_${{ inputs.os }}_${{ inputs.arch }}_release steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' @@ -106,7 +106,7 @@ jobs: git rev-parse HEAD - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} @@ -168,7 +168,7 @@ jobs: run: ${SCCACHE_PATH} --zero-stats - name: Cmake build - OpenVINO - run: cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + run: cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: Show sccache stats run: ${SCCACHE_PATH} --show-stats @@ -210,7 +210,7 @@ jobs: -DPython3_EXECUTABLE=$python_exec \ -DCPACK_GENERATOR=DEB \ ${BUILD_DIR} - cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target package + cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} --target package - name: Cmake & Build - OpenVINO Contrib if: ${{ inputs.build-contrib }} @@ -221,7 +221,7 @@ jobs: -DENABLE_WHEEL=OFF \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} - cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: CMake configure, build and install - OpenVINO JS API if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }} @@ -230,7 +230,7 @@ jobs: -DCPACK_GENERATOR=NPM \ -DENABLE_SYSTEM_TBB=OFF \ -DENABLE_WHEEL=OFF - cmake --build ${BUILD_DIR} --parallel + cmake --build ${BUILD_DIR} --parallel $(nproc) cmake --install ${BUILD_DIR} --prefix ${INSTALL_DIR_JS} - name: Build RPM packages @@ -243,13 +243,13 @@ jobs: -DENABLE_WHEEL=OFF \ -DENABLE_TESTS=OFF \ ${BUILD_DIR} - cmake --build ${BUILD_DIR} --parallel --target package --verbose + cmake --build ${BUILD_DIR} --parallel $(nproc) --target package --verbose # # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs @@ -258,7 +258,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -266,7 +266,7 @@ jobs: - name: Upload openvino wheels if: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl @@ -274,7 +274,7 @@ jobs: - name: Upload openvino js package if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -282,7 +282,7 @@ jobs: - name: Upload openvino developer package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_developer_package path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz @@ -290,7 +290,7 @@ jobs: - name: Upload openvino RPM packages if: ${{ inputs.build-rpm-packages }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_rpm_packages path: ${{ env.BUILD_DIR }}/*.rpm @@ -298,7 +298,7 @@ jobs: - name: Upload openvino debian packages if: ${{ inputs.build-debian-packages }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_debian_packages path: ${{ env.BUILD_DIR }}/*.deb @@ -306,7 +306,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -331,7 +331,7 @@ jobs: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz ${{ env.BUILD_DIR }}/deb ${{ env.MANIFEST_PATH }} - ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.BUILD_DIR) || '' }} + ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.INSTALL_WHEELS_DIR) || '' }} storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} env: diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml index c8e249513a08f0..8a39f348d824c3 100644 --- a/.github/workflows/job_build_windows.yml +++ b/.github/workflows/job_build_windows.yml @@ -17,11 +17,15 @@ on: description: 'Target branch for the build' type: string required: true + cmake-options: + description: 'A string of options passed to CMake' + type: string + required: true permissions: read-all env: - PIP_CACHE_PATH: /mount/caches/pip/win + PIP_CACHE_PATH: "C:\\mount\\caches\\pip\\win" PYTHON_VERSION: '3.11' jobs: @@ -33,7 +37,6 @@ jobs: runs-on: ${{ inputs.runner }} env: CMAKE_BUILD_TYPE: ${{ inputs.build-type }} - CMAKE_GENERATOR: 'Ninja Multi-Config' CMAKE_CXX_COMPILER_LAUNCHER: ccache CMAKE_C_COMPILER_LAUNCHER: ccache CCACHE_REMOTE_DIR: C:\\mount\\caches\\ccache\\windows2022_x86_64_${{ inputs.build-type }}\\${{ inputs.target-branch }} @@ -51,13 +54,13 @@ jobs: PRODUCT_TYPE: 'public_windows_vs2019_${{ inputs.build-type }}' steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -155,25 +158,13 @@ jobs: - name: CMake configure run: | - cmake -G "${{ env.CMAKE_GENERATOR }}" ` - -DENABLE_CPPLINT=OFF ` - -DBUILD_SHARED_LIBS=ON ` - -DENABLE_TESTS=ON ` - -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` - -DENABLE_STRICT_DEPENDENCIES=OFF ` - -DENABLE_PYTHON=ON ` - -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` - -DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }} ` - -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` - -DOPENVINO_EXTRA_MODULES="${{ env.OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${{ env.OPENVINO_CONTRIB_REPO }}/modules/java_api" ` - -S ${{ env.OPENVINO_REPO }} ` - -B ${{ env.BUILD_DIR }} + cmake -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} ${{ inputs.cmake-options }} - name: Clean ccache stats run: '& ccache --zero-stats' - name: Cmake build - OpenVINO - run: cmake --build ${{ env.BUILD_DIR }} --parallel --verbose + run: cmake --build ${{ env.BUILD_DIR }} --config ${{ env.CMAKE_BUILD_TYPE }} --parallel $ENV:NUMBER_OF_PROCESSORS --verbose - name: Show ccache stats run: '& ccache --show-stats' @@ -210,7 +201,7 @@ jobs: -DCPACK_GENERATOR=NPM ` -DENABLE_SYSTEM_TBB=OFF ` -DENABLE_WHEEL=OFF - cmake --build ${{ env.BUILD_DIR }} --parallel + cmake --build ${{ env.BUILD_DIR }} --parallel $ENV:NUMBER_OF_PROCESSORS cmake --install ${{ env.BUILD_DIR }} --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_DIR_JS }} # @@ -218,21 +209,22 @@ jobs: # - name: Upload openvino package - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.zip if-no-files-found: 'error' - name: Upload openvino wheels - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + if: ${{ inputs.build-type != 'Debug' }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.BUILD_DIR }}/wheels/*.whl if-no-files-found: 'error' - name: Upload openvino tests package - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -240,7 +232,7 @@ jobs: - name: Upload openvino js package if: ${{ fromJSON(inputs.affected-components).JS_API }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -254,7 +246,9 @@ jobs: artifacts: | ${{ env.BUILD_DIR }}/openvino_package.zip ${{ env.BUILD_DIR }}/openvino_tests.zip - ${{ env.INSTALL_WHEELS_DIR }}/wheels ${{ env.MANIFEST_PATH }} + ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.INSTALL_WHEELS_DIR) || '' }} storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} + env: + STORE_WHEELS: ${{ inputs.build-type != 'Debug' }} diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 6848871df6e81e..0366ec47ff437e 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -71,7 +71,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -89,7 +89,7 @@ jobs: run: python3 -m pip install -r ${INSTALL_TEST_DIR}/functional_test_utils/layer_tests_summary/requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }} @@ -109,14 +109,14 @@ jobs: timeout-minutes: 25 - name: Save tests execution time - uses: actions/cache/save@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 if: github.ref_name == 'master' with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: test-results-functional-cpu diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 99c363d04d23a7..8fab17043b7465 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -20,13 +20,23 @@ on: description: 'OS that is used for testing in the form of "ubuntu_20_04"' type: string required: true + build-type: + description: 'OpenVINO build type, e.g., "Release"' + type: string + required: false + default: 'Release' + timeout-minutes: + description: 'Timeout in minutes for the job' + type: number + required: false + default: 35 permissions: read-all jobs: CXX_Unit_Tests: name: C++ unit tests - timeout-minutes: ${{ contains(inputs.runner, 'win') && 50 || 35 }} + timeout-minutes: ${{ inputs.timeout-minutes }} runs-on: ${{ inputs.runner }} container: image: ${{ inputs.image }} @@ -165,7 +175,6 @@ jobs: if: ${{ fromJSON(inputs.affected-components).transformations.test && runner.arch != 'ARM64' }} # Ticket: 126281 run: | ${{ env.SETUPVARS_COMMAND }} - ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-Transformations.xml - name: Common test utils tests @@ -208,8 +217,9 @@ jobs: ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_auto_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_func_tests.xml + # Disabled for debug build due to long execution time - name: Template plugin func tests - if: fromJSON(inputs.affected-components).TEMPLATE.test + if: ${{ fromJSON(inputs.affected-components).TEMPLATE.test && inputs.build-type != 'debug' }} run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_template_func_tests --gtest_print_time=1 --gtest_filter=*smoke* --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateFuncTests.xml @@ -231,9 +241,10 @@ jobs: run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_unit_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_unit_tests.xml - + + # Disabled for debug build due to long execution time - name: AutoBatch func tests - if: fromJSON(inputs.affected-components).AUTO_BATCH.test + if: ${{ fromJSON(inputs.affected-components).AUTO_BATCH.test && inputs.build-type != 'debug' }} run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_func_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_func_tests.xml --gtest_filter="*smoke*" @@ -250,14 +261,15 @@ jobs: ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroUnitTests.xml + # Disabled for debug build due to long execution time - name: Hetero func tests - if: ${{ fromJSON(inputs.affected-components).HETERO.test && inputs.os != 'debian_10' }} # Ticket: 153170 + if: ${{ fromJSON(inputs.affected-components).HETERO.test && inputs.os != 'debian_10' && inputs.build-type != 'debug' }} # Ticket: 153170 run: | ${{ env.SETUPVARS_COMMAND }} ${{ env.INSTALL_TEST_DIR }}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-cpp diff --git a/.github/workflows/job_debian_packages.yml b/.github/workflows/job_debian_packages.yml index d7d7bf089b64ec..1aa13efea090ec 100644 --- a/.github/workflows/job_debian_packages.yml +++ b/.github/workflows/job_debian_packages.yml @@ -50,14 +50,14 @@ jobs: # Install debian packages from previous release apt-get install --no-install-recommends -y gnupg wget ca-certificates lsb-release wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - + gpg --output /etc/apt/trusted.gpg.d/intel.gpg --dearmor GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + # Yields only the number, e.g., "20" ubuntu_version=$(lsb_release -r -s) ubuntu_version=${ubuntu_version%.*} - + echo "deb https://apt.repos.intel.com/openvino/2024 ubuntu$ubuntu_version main" | tee /etc/apt/sources.list.d/intel-openvino-2024.list - + apt-get update -y apt-get install -y openvino fi diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index 324e653c57ebab..195abbbd5fb0f9 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -74,7 +74,7 @@ jobs: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input TZ: "Europe/London" # to prevent tzdata from waiting user input - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{ env.PYTHON_VERSION }} @@ -128,7 +128,7 @@ jobs: - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: test-results-${{ inputs.test_type }}-${{ inputs.device }} diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 9956a27f234b36..43fa8f2a7f1740 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -64,7 +64,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -100,7 +100,7 @@ jobs: TEST_DEVICE: CPU - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-jax-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index 321aa88d614310..c879f0cb6a1efc 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -112,7 +112,7 @@ jobs: python3 -m pytest --backend="CPU" --model_zoo_dir="${MODELS_SHARE_PATH}" ${INSTALL_TEST_DIR}/onnx/tests/tests_python/test_zoo_models.py -v -n auto --forked -k 'not _cuda' --model_zoo_xfail - name: Upload logs from pytest - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: onnx_models_tests_logs diff --git a/.github/workflows/job_onnx_runtime.yml b/.github/workflows/job_onnx_runtime.yml index 0ceb080d82184d..df50c4f3e2ad3c 100644 --- a/.github/workflows/job_onnx_runtime.yml +++ b/.github/workflows/job_onnx_runtime.yml @@ -63,7 +63,7 @@ jobs: popd - name: Fetch ONNX runtime version and skip tests list - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | src/frontends/onnx/tests/ci_utils/onnxruntime @@ -77,7 +77,7 @@ jobs: working-directory: ${{ env.ONNX_RUNTIME_UTILS }} - name: Clone ONNX Runtime - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'microsoft/onnxruntime' path: ${{ env.ONNX_RUNTIME_REPO }} @@ -96,7 +96,7 @@ jobs: --config RelWithDebInfo \ --use_openvino CPU \ --build_shared_lib \ - --parallel \ + --parallel $(nproc) \ --skip_tests \ --compile_no_warning_as_error \ --allow_running_as_root \ diff --git a/.github/workflows/job_openvino_js.yml b/.github/workflows/job_openvino_js.yml index e722af78832c12..ecb278fdb54ca3 100644 --- a/.github/workflows/job_openvino_js.yml +++ b/.github/workflows/job_openvino_js.yml @@ -32,7 +32,7 @@ jobs: DISPLAY: ':99' steps: - name: Fetch OpenVINO JS sources - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | src/bindings/js @@ -52,7 +52,7 @@ jobs: - name: Setup Node ${{ env.NODE_VERSION }} if: runner.os != 'Linux' # Node is already installed in the Docker image - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index d63262c665d45c..64be9ef4bbcc44 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -76,7 +76,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -111,10 +111,7 @@ jobs: run: | # To enable pytest parallel features python3 -m pip install pytest-xdist[psutil] - # For torchvision to OpenVINO preprocessing converter - python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt - - # TODO: replace with Python API tests requirements + python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt python3 -m pip install -r ${INSTALL_TEST_DIR}/mo/requirements_dev.txt # @@ -158,6 +155,9 @@ jobs: - name: Install Python Layer tests dependencies run: | + # For torchvision to OpenVINO preprocessing converter + python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt + # layer test requirements python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt @@ -248,7 +248,7 @@ jobs: - name: Clone API snippets if: runner.os != 'macOS' - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: docs/articles_en/assets/snippets path: ${{ env.OPENVINO_REPO }} @@ -267,7 +267,7 @@ jobs: if: ${{ fromJSON(inputs.affected-components).Python_API.test }} run: | python3 -m pip uninstall -y numpy - python3 -m pip install "numpy>=2.0.0,<2.1.0" + python3 -m pip install "numpy>=2.0.0,<2.2.0" python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt # for 'template' extension export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH @@ -276,7 +276,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index 95074dc84f1ff9..b0eba0a278e582 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -24,7 +24,8 @@ on: permissions: read-all env: - PIP_CACHE_PATH: /mount/caches/pip/linux + PIP_CACHE_PATH_LINUX: /mount/caches/pip/linux + PIP_CACHE_PATH_WIN: "C:\\mount\\caches\\pip\\win" jobs: PyTorch_Layer_Tests: @@ -83,7 +84,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -94,9 +95,9 @@ jobs: uses: ./openvino/.github/actions/setup_python with: version: ${{ inputs.python-version }} - pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} - should-setup-pip-paths: ${{ runner.os == 'Linux' }} - self-hosted-runner: ${{ runner.os == 'Linux' }} + pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH_LINUX || env.PIP_CACHE_PATH_WIN }} + should-setup-pip-paths: ${{ runner.os != 'macOS' }} + self-hosted-runner: ${{ runner.os != 'macOS' }} - name: Install OpenVINO Python wheels (Linux and macOS) if: runner.os != 'Windows' @@ -121,7 +122,7 @@ jobs: - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 # due to CVS-152795, parallel run is not possible on Windows - run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit -v --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 @@ -130,7 +131,7 @@ jobs: - name: PyTorch torch.export Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287 run: | - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit_torch_export --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit_torch_export -v --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch_export.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 @@ -140,14 +141,14 @@ jobs: - name: PyTorch torch.compile TORCHFX Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.os != 'macOS' && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287 run: | - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit_fx_backend --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit_fx_backend -v --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch_compile.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 PYTORCH_TRACING_MODE: TORCHFX - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python-pytorch-layers diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index a77c1318f3a0c8..d52b819981d821 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -22,7 +22,7 @@ permissions: read-all jobs: PyTorch_Models_Tests: name: PyTorch Models tests - timeout-minutes: ${{ inputs.model_scope == 'precommit' && 35 || 400 }} + timeout-minutes: ${{ inputs.model_scope == 'precommit' && 45 || 400 }} runs-on: ${{ inputs.runner }} container: ${{ fromJSON(inputs.container) }} defaults: @@ -77,7 +77,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -105,8 +105,7 @@ jobs: python3 -m pip install ./openvino_tokenizers-* working-directory: ${{ env.INSTALL_WHEELS_DIR }} - - name: Install PyTorch tests requirements for precommit - if: ${{ inputs.model_scope == 'precommit' }} + - name: Install PyTorch tests requirements run: | python3 -m pip install -r ${INSTALL_TEST_DIR}/requirements_pytorch env: @@ -116,7 +115,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly_scope1' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_${{ inputs.model_scope }}_tests.html --self-contained-html -v -n 2 -k "TestTimmConvertModel or TestTorchHubConvertModel or TestEdsrConvertModel" + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_${{ inputs.model_scope }}_tests.html --self-contained-html -v -n 4 -k "TestTimmConvertModel or TestTorchHubConvertModel or TestEdsrConvertModel" env: TYPE: ${{ inputs.model_scope == 'precommit' && 'precommit' || 'nightly' }} TEST_DEVICE: CPU @@ -137,7 +136,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 2 + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -vvv -s --tb=short -n 2 env: TEST_DEVICE: CPU USE_SYSTEM_CACHE: False @@ -160,6 +159,17 @@ jobs: TEST_DEVICE: CPU USE_SYSTEM_CACHE: False + - name: TorchFX GPTQ Pattern Test + if: ${{ inputs.model_scope == 'precommit' }} + # install torch 2.3.1 as newer is not yet supported by openvino backend + run: | + export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH + python3 -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cpu + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_gptq_torchfx_transformations.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_gptqpattern_tests.html --self-contained-html -v --tb=short + env: + TEST_DEVICE: CPU + USE_SYSTEM_CACHE: False + - name: Reformat unsupported ops file if: ${{ inputs.model_scope != 'precommit' && !cancelled()}} run: | @@ -171,7 +181,7 @@ jobs: df -h - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-torch-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index 7cde4e6fd18eae..e144aa0cfb95aa 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -68,7 +68,7 @@ jobs: - name: Fetch setup_python action # Python is already installed on Ubuntu within Dockerfile if: runner.os != 'Linux' - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index ae6e91a00d1497..29afb466d69a42 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -24,7 +24,8 @@ on: permissions: read-all env: - PIP_CACHE_PATH: /mount/caches/pip/linux + PIP_CACHE_PATH_LINUX: /mount/caches/pip/linux + PIP_CACHE_PATH_WIN: "C:\\mount\\caches\\pip\\win" jobs: TensorFlow_Layer_Tests: @@ -83,7 +84,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -94,9 +95,9 @@ jobs: uses: ./openvino/.github/actions/setup_python with: version: ${{ inputs.python-version }} - pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} - should-setup-pip-paths: ${{ runner.os == 'Linux' }} - self-hosted-runner: ${{ runner.os == 'Linux' }} + pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH_LINUX || env.PIP_CACHE_PATH_WIN }} + should-setup-pip-paths: ${{ runner.os != 'macOS' }} + self-hosted-runner: ${{ runner.os != 'macOS' }} - name: Install OpenVINO Python wheels (Linux and macOS) if: runner.os != 'Windows' @@ -150,7 +151,7 @@ jobs: TEST_PRECISION: FP16 - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python-tf-layers diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index db34ec7b793551..5321beb8703de1 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -69,7 +69,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -107,7 +107,7 @@ jobs: TEST_DEVICE: CPU - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-tensorflow-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 238dbfec3a34eb..4b84bee25c78f4 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -20,12 +20,16 @@ on: description: 'Components that are affected by changes in the commit defined by the Smart CI Action' type: string required: true + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true permissions: read-all env: - PIP_CACHE_PATH: /mount/caches/pip/linux - PYTHON_VERSION: '3.11' + PIP_CACHE_PATH_LINUX: /mount/caches/pip/linux + PIP_CACHE_PATH_WIN: "C:\\mount\\caches\\pip\\win" TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} jobs: @@ -52,7 +56,7 @@ jobs: echo "EXTENSION_BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python @@ -63,16 +67,16 @@ jobs: if: runner.os == 'macOS' run: brew install pigz - - name: Setup Python ${{ env.PYTHON_VERSION }} + - name: Setup Python ${{ inputs.python-version }} uses: ./.github/actions/setup_python with: - version: ${{ env.PYTHON_VERSION }} - pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} - should-setup-pip-paths: ${{ runner.os == 'Linux' }} - self-hosted-runner: ${{ runner.os == 'Linux' }} + version: ${{ inputs.python-version }} + pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH_LINUX || env.PIP_CACHE_PATH_WIN }} + should-setup-pip-paths: ${{ runner.os != 'macOS' }} + self-hosted-runner: ${{ runner.os != 'macOS' }} - name: Clone OpenVINO Tokenizers - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_tokenizers' path: ${{ env.OPENVINO_TOKENIZERS_REPO }} @@ -133,7 +137,7 @@ jobs: - name: Upload openvino tokenizers wheel if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tokenizers_wheel path: ${{ env.EXTENSION_BUILD_DIR }}/*.whl diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 4c631b673f8cc2..00f3a321e0dd1f 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -26,7 +26,7 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Checkout Labeller Script - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github' diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 0345d5259e8182..10de6867c7d0e2 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -5,7 +5,7 @@ on: merge_group: push: branches: - # - master + - master - 'releases/**' concurrency: @@ -28,7 +28,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -62,7 +62,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -146,6 +146,7 @@ jobs: shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 7b5467b01ad73e..27f54da6ecdc60 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -29,7 +29,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -63,7 +63,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -109,13 +109,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} @@ -169,7 +169,7 @@ jobs: - name: Cmake build - CC COLLECT run: | cmake --build ${BUILD_DIR} --parallel 8 --config ${{ env.CMAKE_BUILD_TYPE }} - cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib + cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib - name: Show sccache stats run: ${SCCACHE_PATH} --show-stats @@ -182,7 +182,7 @@ jobs: - name: Build C++ samples - OpenVINO build tree run: | cmake -G "${{ env.CMAKE_GENERATOR }}" -DOpenVINO_DIR=${BUILD_DIR} -S ${INSTALL_DIR}/samples/cpp -B ${BUILD_DIR}/cpp_samples - cmake --build ${BUILD_DIR}/cpp_samples --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target hello_query_device + cmake --build ${BUILD_DIR}/cpp_samples --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} --target hello_query_device - name: Build C samples - OpenVINO install tree run: ${INSTALL_DIR}/samples/c/build_samples.sh -i ${INSTALL_DIR} -b ${BUILD_DIR}/c_samples @@ -223,7 +223,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs @@ -232,7 +232,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -240,7 +240,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.tar.gz @@ -248,7 +248,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -282,13 +282,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index f67b011b0caed5..85b0db8c36294e 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -28,7 +28,7 @@ jobs: target_branch: ${{ steps.set_target_branch.outputs.target_branch }} steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -63,7 +63,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -102,7 +102,7 @@ jobs: if: ${{ github.event_name != 'merge_group' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' @@ -213,13 +213,13 @@ jobs: source ${OPENVINO_BUILD_DIR}/dependencies/deactivate_conanbuild.sh - name: Cmake - Build - run: cmake --build ${OPENVINO_BUILD_DIR} --parallel + run: cmake --build ${OPENVINO_BUILD_DIR} --parallel $(nproc) - name: Show ccache stats run: ccache --show-stats - name: Cmake - Install - run: cmake --build ${OPENVINO_BUILD_DIR} --parallel --target install + run: cmake --build ${OPENVINO_BUILD_DIR} --parallel $(nproc) --target install - name: Build OpenVINO C++ samples run: | diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index b23e67a0f2b30e..4bb597d83fadc8 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -1,10 +1,9 @@ -name: Linux Sanitizers (Ubuntu 20.04, Python 3.11) +name: Linux Sanitizers (Ubuntu 20.04, Python 3.9) on: schedule: # run daily at 00:00 - cron: '0 0 * * *' workflow_dispatch: - # pull_request: concurrency: # github.ref is not unique in post-commit @@ -14,22 +13,69 @@ concurrency: permissions: read-all env: - PIP_CACHE_PATH: /mount/caches/pip/linux - PYTHON_VERSION: '3.11' TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} jobs: + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*' + + Docker: + needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_22_04_x64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + Build: + needs: [Smart_CI, Docker] timeout-minutes: 500 defaults: run: shell: bash - runs-on: aks-linux-16-cores-32gb + runs-on: aks-linux-16-cores-64gb if: ${{ github.repository_owner == 'openvinotoolkit' }} container: - image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64 }} volumes: - /mount:/mount + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING strategy: max-parallel: 3 fail-fast: false @@ -39,10 +85,9 @@ jobs: SANITIZER_CMAKE_OPTION: '-DENABLE_SANITIZER=ON' - SANITIZER: 'UndefinedBehavior' SANITIZER_CMAKE_OPTION: '-DENABLE_UB_SANITIZER=ON' -# - SANITIZER: 'Thread' # Problems with protobuf -# SANITIZER_CMAKE_OPTION: '-DENABLE_THREAD_SANITIZER=ON' + - SANITIZER: 'Thread' + SANITIZER_CMAKE_OPTION: '-DENABLE_THREAD_SANITIZER=ON' env: - DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'RelWithDebInfo' CMAKE_GENERATOR: 'Ninja' GITHUB_WORKSPACE: '/__w/openvino/openvino' @@ -51,59 +96,33 @@ jobs: INSTALL_DIR: /__w/openvino/openvino/openvino_install INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install BUILD_DIR: /__w/openvino/openvino/openvino_build - LSAN_IGNORE: /__w/openvino/openvino/openvino/tests/lsan/suppressions.txt - ASAN_IGNORE: /__w/openvino/openvino/openvino/tests/asan/suppressions.supp - CXX: clang++ - CC: clang + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_ERROR_LOG: /__w/openvino/sccache_log.txt + SCCACHE_LOG: warn + SCCACHE_AZURE_KEY_PREFIX: sanitizers_lin_${{ matrix.SANITIZER }}_master + SCCACHE_CACHE_SIZE: 50G steps: - - name: Set apt retries - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - - name: Install git - run: | - apt-get update - apt-get install --assume-yes --no-install-recommends git ca-certificates - - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: ${{ env.OPENVINO_REPO }} submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} submodules: 'true' ref: ${{ env.TARGET_BRANCH }} - # - # Print system info - # - - name: System info uses: ./openvino/.github/actions/system_info - - # - # Dependencies - # - - - name: Install build dependencies - run: | - bash ${OPENVINO_REPO}/install_build_dependencies.sh - apt --assume-yes install clang lld - - - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ env.PYTHON_VERSION }} - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'true' - self-hosted-runner: 'true' - show-cache-info: 'true' - + - name: Install python dependencies run: | # For Python API: build and wheel packaging @@ -120,17 +139,15 @@ jobs: # For running Paddle frontend unit tests python3 -m pip install -r ${OPENVINO_REPO}/src/frontends/paddle/tests/requirements.txt - + # # Build # - + - name: Clean sccache stats + run: ${SCCACHE_PATH} --zero-stats + - name: CMake configure - OpenVINO run: | - export ASAN_OPTIONS=halt_on_error=0:suppressions=${ASAN_IGNORE} - export LSAN_OPTIONS=suppressions=${LSAN_IGNORE}:NEOReadDebugKeys=1:DisableDeepBind=1 - export CC=clang - export CXX=clang++ cmake \ -G "${{ env.CMAKE_GENERATOR }}" \ -DENABLE_CPPLINT=OFF \ @@ -147,24 +164,26 @@ jobs: -DENABLE_OV_PYTORCH_FRONTEND=ON \ -DENABLE_OV_JAX_FRONTEND=ON \ -DENABLE_OV_ONNX_FRONTEND=ON \ + -DENABLE_INTEL_NPU=OFF \ -DENABLE_ONEDNN_FOR_GPU=OFF \ -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ + -DENABLE_WHEEL=OFF \ ${{ matrix.SANITIZER_CMAKE_OPTION }} \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} - name: Cmake build - OpenVINO run: | - export ASAN_OPTIONS=halt_on_error=0:suppressions=${ASAN_IGNORE} - export LSAN_OPTIONS=suppressions=${LSAN_IGNORE}:NEOReadDebugKeys=1:DisableDeepBind=1 - cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} - + cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} + + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats + - name: Cmake install - OpenVINO run: | cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} - + - name: Remove unused files to free space run: rm -rf ${BUILD_DIR}/* @@ -185,10 +204,17 @@ jobs: # # Upload build artifacts # + - name: Upload sccache log + if: ${{ always() }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: sccache_log_${{ matrix.SANITIZER }} + path: ${{ env.SCCACHE_ERROR_LOG }} + if-no-files-found: 'error' - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -196,7 +222,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -205,11 +231,11 @@ jobs: CXX_Unit_Tests: name: C++ unit tests if: ${{ github.repository_owner == 'openvinotoolkit' }} - needs: Build - timeout-minutes: 100 - runs-on: 'aks-linux-16-cores-32gb' + needs: [Docker, Build] + timeout-minutes: 120 + runs-on: aks-linux-16-cores-32gb container: - image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64 }} defaults: run: shell: bash @@ -220,18 +246,18 @@ jobs: include: - SANITIZER: 'AddressAndLeak' - SANITIZER: 'UndefinedBehavior' -# - SANITIZER: 'Thread' # Problems with protobuf at the Build stage + - SANITIZER: 'Thread' env: - DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input - INSTALL_DIR: ${{ github.workspace }}/install - INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + OPENVINO_REPO: /__w/openvino/openvino/openvino + INSTALL_DIR: /__w/openvino/openvino/install + INSTALL_TEST_DIR: /__w/openvino/openvino/install/tests + BUILD_DIR: /__w/openvino/openvino/openvino_build TBB_ENABLE_SANITIZERS: 1 - CC: clang - CXX: clang++ + ASAN_OPTIONS: halt_on_error=0:suppressions=/__w/openvino/openvino/openvino/tests/sanitizers/asan/suppressions.supp + LSAN_OPTIONS: suppressions=/__w/openvino/openvino/openvino/tests/sanitizers/lsan/suppressions.txt:NEOReadDebugKeys=1:DisableDeepBind=1 + TSAN_OPTIONS: suppressions=/__w/openvino/openvino/openvino/tests/sanitizers/tsan/suppressions.txt + steps: - - name: Set apt retries - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - name: Download OpenVINO package uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: @@ -244,16 +270,6 @@ jobs: name: ${{ format('openvino_tests_{0}', matrix.SANITIZER) }} path: ${{ env.INSTALL_TEST_DIR }} - # Needed as ${{ github.workspace }} is not working correctly when using Docker - - name: Setup Variables - continue-on-error: true - run: | - echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" - echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" - - echo "ASAN_OPTIONS=halt_on_error=0:suppressions=$GITHUB_WORKSPACE/openvino/tests/asan/suppressions.supp" >> "$GITHUB_ENV" - echo "LSAN_OPTIONS=suppressions=$GITHUB_WORKSPACE/openvino/tests/lsan/suppressions.txt:NEOReadDebugKeys=1:DisableDeepBind=1" >> "$GITHUB_ENV" - - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR @@ -263,77 +279,71 @@ jobs: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - - name: Install dependencies (Linux) - run: | - $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -c=gpu -y - apt update && apt --assume-yes install clang lld - - name: Fetch Sanitizer Suppression Lists - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | - tests/lsan/suppressions.txt - tests/asan/suppressions.supp + tests/sanitizers/lsan/suppressions.txt + tests/sanitizers/asan/suppressions.supp + tests/sanitizers/tsan/suppressions.txt sparse-checkout-cone-mode: false - path: 'openvino' + path: ${{ env.OPENVINO_REPO }} # # Tests # - name: OpenVINO Core Unit Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVCoreUT.xml - name: OpenVINO Inference Functional Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh - ${INSTALL_TEST_DIR}/ov_inference_functional_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-InferenceFunc.xml - name: OpenVINO Inference Unit Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_inference_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-InferenceUnit.xml - name: Low Precision Transformations Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh - ${INSTALL_TEST_DIR}/ov_lp_transformations_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-LpTransformations.xml - name: OpenVINO Conditional compilation tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_conditional_compilation_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ConditionalCompilation.xml - name: IR frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_ir_frontend_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-IRFrontend.xml - name: PaddlePaddle frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/paddle_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-PaddleTests.xml - name: ONNX frontend tests - if: always() + if: ${{ !cancelled() && matrix.SANITIZER != 'Thread' }} # Ticket: 155291 run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_onnx_frontend_tests --gtest_print_time=1 \ @@ -341,14 +351,14 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ONNXFrontend.xml - name: TensorFlow Common frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_tensorflow_common_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TensorFlowCommonFrontend.xml - name: TensorFlow frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh @@ -371,56 +381,56 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-Transformations.xml - name: Common test utils tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_util_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CommonUtilTests.xml - name: Snippets func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_snippets_func_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SnippetsFuncTests.xml - name: CPU plugin unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CPUUnitTests.xml - name: ov_subgraphs_dumper_tests tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_subgraphs_dumper_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_subgraphs_dumper_tests.xml - name: Template OpImpl tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_op_conformance_tests --gtest_print_time=1 --device=TEMPLATE --gtest_filter=*OpImpl*\ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml - name: AUTO unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_unit_tests.xml - name: AUTO func Tests - if: always() + if: ${{ 'false' }} # Issue 155210 run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_auto_func_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_func_tests.xml - name: Template plugin func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_template_func_tests --gtest_print_time=1 \ @@ -428,32 +438,32 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateFuncTests.xml - name: OpenVINO C API tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_capi_test --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpenVINOCAPITests.xml - name: AutoBatch unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_batch_unit_tests --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_batch_unit_tests.xml - name: AutoBatch func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_batch_func_tests --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_batch_func_tests.xml --gtest_filter="*smoke*" - name: Proxy Plugin func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVProxyTests.xml - name: Hetero unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroUnitTests.xml @@ -465,7 +475,7 @@ jobs: ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-cpp_${{ matrix.SANITIZER }} diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 6e3f344c6dd944..c587c5ad7323b3 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -42,7 +42,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -82,13 +82,13 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -159,7 +159,7 @@ jobs: -B ${{ env.BUILD_DIR }} - name: Cmake build - OpenVINO - run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + run: cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: Show ccache stats run: ccache --show-stats @@ -186,7 +186,7 @@ jobs: -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_CONTRIB_REPO }}/modules/custom_operations \ -S ${{ env.OPENVINO_REPO }} \ -B ${{ env.BUILD_DIR }} - cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: CMake configure, build and install - OpenVINO JS API if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API @@ -196,7 +196,7 @@ jobs: -S ${{ env.OPENVINO_REPO }} \ -B ${{ env.BUILD_DIR }} - cmake --build ${{ env.BUILD_DIR }} --parallel + cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR_JS }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake # @@ -205,14 +205,14 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' - name: Upload openvino wheels - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl @@ -220,7 +220,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -228,7 +228,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -258,6 +258,7 @@ jobs: runner: 'macos-13' shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 16658318de20d8..0708a844fe6b8b 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -42,7 +42,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -82,13 +82,13 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' @@ -159,7 +159,7 @@ jobs: -B ${{ env.BUILD_DIR }} - name: Cmake build - OpenVINO - run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + run: cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: Show ccache stats run: ccache --show-stats @@ -187,7 +187,7 @@ jobs: -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_CONTRIB_REPO }}/modules/custom_operations \ -S ${{ env.OPENVINO_REPO }} \ -B ${{ env.BUILD_DIR }} - cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} - name: CMake configure, build and install - OpenVINO JS API if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API @@ -197,7 +197,7 @@ jobs: -S ${{ env.OPENVINO_REPO }} \ -B ${{ env.BUILD_DIR }} - cmake --build ${{ env.BUILD_DIR }} --parallel + cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR_JS }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake # @@ -206,14 +206,14 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' - name: Upload openvino wheels - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl @@ -221,7 +221,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -229,7 +229,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -258,6 +258,7 @@ jobs: runner: 'macos-13-xlarge' shell: bash affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS CXX_Unit_Tests: diff --git a/.github/workflows/manylinux_2014.yml b/.github/workflows/manylinux_2014.yml new file mode 100644 index 00000000000000..ed375fb868459f --- /dev/null +++ b/.github/workflows/manylinux_2014.yml @@ -0,0 +1,191 @@ +name: Manylinux 2014 +on: + workflow_dispatch: + pull_request: + merge_group: + push: + branches: + - master + - 'releases/**' + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-manylinux-2014 + cancel-in-progress: true + +permissions: read-all + +env: + PIP_CACHE_PATH: /mount/caches/pip/linux + +jobs: + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg' + + - name: Show affected components + run: | + echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + Docker: + needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_22_04_x64_docker + ov_build/manylinux2014_x86_64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + + Build: + needs: [Docker] + timeout-minutes: 120 + defaults: + run: + shell: bash + runs-on: aks-linux-16-cores-32gb-manylinux + if: ${{ github.repository_owner == 'openvinotoolkit' }} + container: + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64_docker }} + volumes: + - /mount:/mount + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING -e DOCKER_CONFIG -v ${{ github.workspace }}:${{ github.workspace }} + env: + CMAKE_BUILD_TYPE: 'Release' + OPENVINO_REPO: ${{ github.workspace }}/src + INSTALL_DIR: ${{ github.workspace }}/install/openvino + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels + BUILD_DIR: ${{ github.workspace }}/build + DOCKER_CONFIG: "/mount/.docker" + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_CACHE_SIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: manylinux_2014 + + steps: + - name: Clone OpenVINO + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + path: ${{ env.OPENVINO_REPO }} + submodules: 'true' + + - name: System info + uses: ./src/.github/actions/system_info + + - name: Create docker build cache + run: | + docker volume create ov_build_cache + + - name: Build OpenVINO + run: | + docker run --rm \ + -v ${{ env.OPENVINO_REPO }}:/work/src \ + -v ov_build_cache:/work/build \ + -v ${{ env.INSTALL_DIR }}:/work/install \ + -e SCCACHE_AZURE_BLOB_CONTAINER \ + -e SCCACHE_AZURE_CONNECTION_STRING \ + -e SCCACHE_SERVER_PORT \ + -e SCCACHE_IGNORE_SERVER_IO_ERROR \ + -e SCCACHE_CACHE_SIZE \ + -e SCCACHE_AZURE_KEY_PREFIX \ + -e CMAKE_CXX_COMPILER_LAUNCHER \ + -e CMAKE_C_COMPILER_LAUNCHER \ + -w /work/src \ + ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ + /bin/bash -c " + cmake -DENABLE_CPPLINT=OFF -DENABLE_NCC_STYLE=OFF -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF -S /work/src -B /work/build && + cmake --build /work/build --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} && + cmake --install /work/build --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/install + " + + - name: Pack Artifacts + run: mkdir -p ${{ env.BUILD_DIR }} && tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz + working-directory: ${{ env.INSTALL_DIR }} + + - name: Build Python API(Python 3.9-3.13) + run: | + SUPPORTED_PYTHON_VERSIONS=("39" "310" "311" "312" "313") + for PY_VER in "${SUPPORTED_PYTHON_VERSIONS[@]}"; do + python_path=/opt/python/cp${PY_VER}-cp${PY_VER}/bin + docker run --rm \ + -v ${{ env.OPENVINO_REPO }}:/work/src \ + -v ${{ env.INSTALL_WHEELS_DIR }}:/work/wheels \ + -v ${{ env.PIP_CACHE_PATH }}:/work/pip_cache \ + -v ov_build_cache:/work/build \ + -e SCCACHE_AZURE_BLOB_CONTAINER \ + -e SCCACHE_AZURE_CONNECTION_STRING \ + -e SCCACHE_SERVER_PORT \ + -e SCCACHE_IGNORE_SERVER_IO_ERROR \ + -e SCCACHE_CACHE_SIZE \ + -e SCCACHE_AZURE_KEY_PREFIX \ + -e CMAKE_CXX_COMPILER_LAUNCHER \ + -e CMAKE_C_COMPILER_LAUNCHER \ + -w /work/src \ + ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ + /bin/bash -c " + export PATH=${python_path}:\$PATH + PIP_VER=$(python3 -c "import pip; print(pip.__version__)") + export "PIP_CACHE_DIR=/work/pip_cache/${PIP_VER}" + python3 -m pip install -r /work/src/src/bindings/python/wheel/requirements-dev.txt && + cmake -DOpenVINODeveloperPackage_DIR=/work/build -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON -S /work/src/src/bindings/python -B /work/build_py${PY_VER} && + cmake --build /work/build_py${PY_VER} --parallel $(nproc) --target ie_wheel --config ${{ env.CMAKE_BUILD_TYPE }} && + cmake --install /work/build_py${PY_VER} --config ${{ env.CMAKE_BUILD_TYPE }} --prefix /work/wheels --component python_wheels + " + done + + # + # Upload build artifacts + # + - name: Upload openvino package + if: ${{ always() }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: openvino_package + path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz + if-no-files-found: 'error' + + - name: Upload openvino wheels + if: ${{ always() }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: openvino_wheels + path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl + if-no-files-found: 'error' \ No newline at end of file diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index 151227f111c9e0..f48986d4a0d304 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -24,15 +24,15 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10' - name: Cache pip - uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('tools/mo/requirements*.txt') }} diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml index ee5f3e58e363e6..4d69563a741d3a 100644 --- a/.github/workflows/ovc.yml +++ b/.github/workflows/ovc.yml @@ -19,15 +19,15 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.10' - name: Cache pip - uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('src/bindings/python/requirements*.txt') }} diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index ae0625ce4a453c..caed37eee89056 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -28,10 +28,10 @@ jobs: if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.9' @@ -50,7 +50,7 @@ jobs: git diff > samples_diff.diff working-directory: samples/python - - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() with: name: samples_diff @@ -68,7 +68,7 @@ jobs: git diff > pyopenvino_diff.diff working-directory: src/bindings/python/src/openvino - - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() with: name: pyopenvino_diff @@ -86,7 +86,7 @@ jobs: git diff > wheel_diff.diff working-directory: src/bindings/python/wheel - - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() with: name: wheel_diff diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml index 687b79d2606bec..ba38d6a9f90fed 100644 --- a/.github/workflows/send_workflows_to_opentelemetry.yml +++ b/.github/workflows/send_workflows_to_opentelemetry.yml @@ -41,7 +41,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github' diff --git a/.github/workflows/ubuntu_20.yml b/.github/workflows/ubuntu_20.yml index 6056c20945c801..63a1fab87d566f 100644 --- a/.github/workflows/ubuntu_20.yml +++ b/.github/workflows/ubuntu_20.yml @@ -30,7 +30,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -64,7 +64,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 2ebca2b059fdd2..f4caec8b2458a0 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -32,7 +32,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -66,7 +66,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -183,7 +183,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -227,7 +227,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-${{ env.TEST_DEVICE }} path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz @@ -253,7 +253,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ matrix.TEST_TYPE == 'API' }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-TEMPLATE path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz @@ -459,7 +459,7 @@ jobs: popd - name: Clone OpenVINO Contrib - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} @@ -477,7 +477,7 @@ jobs: -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ -S ${OPENVINO_CONTRIB_REPO}/modules/nvidia_plugin \ -B ${NVIDIA_BUILD_DIR} - cmake --build ${NVIDIA_BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --verbose -- ov_nvidia_func_tests ov_nvidia_unit_tests + cmake --build ${NVIDIA_BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} --verbose -- ov_nvidia_func_tests ov_nvidia_unit_tests - name: Show ccache stats run: ${SCCACHE_PATH} --show-stats @@ -491,6 +491,7 @@ jobs: shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS iGPU: diff --git a/.github/workflows/ubuntu_22_dpcpp.yml b/.github/workflows/ubuntu_22_dpcpp.yml index 9ca27262a5dcde..48230155f7e903 100644 --- a/.github/workflows/ubuntu_22_dpcpp.yml +++ b/.github/workflows/ubuntu_22_dpcpp.yml @@ -20,7 +20,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -54,7 +54,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index bb147450438160..d874e06a189232 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -27,7 +27,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -61,7 +61,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -144,9 +144,31 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' + TensorFlow_Layer_Tests: + name: TensorFlow Layer Tests + needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] + uses: ./.github/workflows/job_tensorflow_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + + Openvino_tokenizers: + name: OpenVINO tokenizers extension + needs: [ Build, Smart_CI, Docker ] + uses: ./.github/workflows/job_tokenizers.yml + with: + runner: 'aks-linux-4-cores-16gb' + shell: bash + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS + Overall_Status: name: ci/gha_overall_status_ubuntu_24 - needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests] + needs: [Smart_CI, Build, Debian_Packages, Samples, Python_Unit_Tests, Pytorch_Layer_Tests, TensorFlow_Layer_Tests, Openvino_tokenizers] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index c4d835637352ad..45d6c9ce98317a 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -24,7 +24,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -58,7 +58,7 @@ jobs: images: "${{ steps.handle_docker.outputs.images }}" steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/handle_docker id: handle_docker @@ -91,7 +91,7 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: webassembly_Release steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 9c026f01e47233..6a5846b514dbd7 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -30,7 +30,7 @@ jobs: skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -73,13 +73,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: 'testdata' @@ -193,7 +193,7 @@ jobs: - name: Build C++ samples - OpenVINO build tree run: | cmake -G "${{ env.CMAKE_GENERATOR }}" -DOpenVINO_DIR=${{ env.BUILD_DIR }} -S ${{ env.INSTALL_DIR }}/samples/cpp -B ${{ env.BUILD_DIR }}/cpp_samples - cmake --build ${{ env.BUILD_DIR }}/cpp_samples --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target hello_query_device + cmake --build ${{ env.BUILD_DIR }}/cpp_samples --parallel $ENV:NUMBER_OF_PROCESSORS --config ${{ env.CMAKE_BUILD_TYPE }} --target hello_query_device - name: Build C samples - OpenVINO install tree run: | @@ -249,7 +249,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip @@ -257,7 +257,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -282,13 +282,13 @@ jobs: steps: - name: Clone OpenVINO - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: 'openvino' submodules: 'true' - name: Clone test models - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: 'openvinotoolkit/testdata' path: 'testdata' @@ -331,7 +331,7 @@ jobs: -B ${{ env.BUILD_DIR }} - name: Cmake build - CC ON - run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target benchmark_app --verbose + run: cmake --build ${{ env.BUILD_DIR }} --parallel $ENV:NUMBER_OF_PROCESSORS --config ${{ env.CMAKE_BUILD_TYPE }} --target benchmark_app --verbose - name: List bin files shell: cmd @@ -369,7 +369,7 @@ jobs: run: Expand-Archive ${{ env.INSTALL_TEST_DIR }}/openvino_tests.zip -DestinationPath "${{ env.INSTALL_TEST_DIR }}" - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -387,7 +387,7 @@ jobs: run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/layer_tests_summary/requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} @@ -402,7 +402,7 @@ jobs: timeout-minutes: 60 - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-functional-cpu diff --git a/.github/workflows/windows_vs2019_debug.yml b/.github/workflows/windows_vs2019_debug.yml index f36c3f33031f7d..68a99055f5bdb8 100644 --- a/.github/workflows/windows_vs2019_debug.yml +++ b/.github/workflows/windows_vs2019_debug.yml @@ -2,6 +2,7 @@ name: Windows (VS 2019, Python 3.11, Debug) on: workflow_dispatch: merge_group: + pull_request: push: branches: - master @@ -25,7 +26,7 @@ jobs: target_branch: ${{ steps.set_target_branch.outputs.target_branch }} steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -52,10 +53,21 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_windows.yml with: - runner: 'aks-win-16-cores-32gb' + runner: 'aks-win-32-cores-128gb' affected-components: ${{ needs.smart_ci.outputs.affected_components }} build-type: 'Debug' target-branch: ${{ needs.smart_ci.outputs.target_branch }} + cmake-options: |- + -G "Ninja" ` + -DENABLE_PYTHON=OFF ` + -DENABLE_CPPLINT=OFF ` + -DBUILD_SHARED_LIBS=ON ` + -DENABLE_TESTS=ON ` + -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` + -DENABLE_STRICT_DEPENDENCIES=OFF ` + -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` + -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` + -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" CXX_Unit_Tests: name: C++ unit tests @@ -65,7 +77,9 @@ jobs: runner: 'aks-win-4-cores-8gb' affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'windows_2019' - + build-type: 'Debug' + timeout-minutes: 60 + Overall_Status: name: ci/gha_overall_status_windows_debug needs: [ Smart_CI, Build, CXX_Unit_Tests ] @@ -78,4 +92,4 @@ jobs: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} - run: exit 1 + run: exit 1 \ No newline at end of file diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index 8cac2b88078d15..a416f577cdb3e1 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -14,7 +14,7 @@ concurrency: env: TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} - PIP_CACHE_PATH: /mount/caches/pip/win + PIP_CACHE_PATH: "C:\\mount\\caches\\pip\\win" PYTHON_VERSION: '3.11' permissions: read-all @@ -28,7 +28,7 @@ jobs: target_branch: ${{ steps.set_target_branch.outputs.target_branch }} steps: - name: checkout action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: .github/actions/smart-ci @@ -59,6 +59,17 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} build-type: 'Release' target-branch: ${{ needs.smart_ci.outputs.target_branch }} + cmake-options: |- + -G "Ninja Multi-Config" ` + -DENABLE_PYTHON=ON ` + -DENABLE_CPPLINT=OFF ` + -DBUILD_SHARED_LIBS=ON ` + -DENABLE_TESTS=ON ` + -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` + -DENABLE_STRICT_DEPENDENCIES=OFF ` + -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` + -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` + -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" Samples: needs: [ Build, Smart_CI ] @@ -98,7 +109,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -165,7 +176,7 @@ jobs: steps: - name: Fetch OpenVINO JS sources - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | src/bindings/js @@ -178,7 +189,7 @@ jobs: path: ${{ env.OPENVINO_JS_LIBS_DIR }} - name: Setup Node ${{ env.NODE_VERSION }} - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version: ${{ env.NODE_VERSION }} @@ -224,6 +235,7 @@ jobs: runner: 'aks-win-4-cores-8gb' shell: pwsh affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' if: fromJSON(needs.smart_ci.outputs.affected_components).TOKENIZERS Python_Unit_Tests: @@ -263,7 +275,7 @@ jobs: working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -275,7 +287,7 @@ jobs: with: version: ${{ env.PYTHON_VERSION }} pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'false' + should-setup-pip-paths: 'true' self-hosted-runner: 'true' - name: Install OpenVINO Python wheels @@ -391,7 +403,7 @@ jobs: run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python @@ -424,6 +436,8 @@ jobs: runner: 'aks-win-4-cores-8gb' affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'windows_2019' + build-type: 'Release' + timeout-minutes: 50 CPU_Functional_Tests: name: CPU functional tests @@ -463,7 +477,7 @@ jobs: popd - name: Fetch setup_python action - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml @@ -481,7 +495,7 @@ jobs: run: python3 -m pip install -r ${{ github.workspace }}\install\tests\functional_test_utils\layer_tests_summary\requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} @@ -495,14 +509,14 @@ jobs: timeout-minutes: 60 - name: Save tests execution time - uses: actions/cache/save@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 if: github.ref_name == 'master' with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-functional-cpu diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 7da00df4e46d32..55ecc2500635b1 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -3,11 +3,17 @@ name: Rerun Workflow with Known Errors on: workflow_run: workflows: - - Linux (Ubuntu 20.04, Python 3.11) + - Linux (Ubuntu 20.04, Python 3.9) + - Linux (Ubuntu 22.04, Python 3.11) + - Linux (Ubuntu 24.04, Python 3.12) + - Debian 10 ARM + - Android ARM64 with vcpkg + - Android x64 - Linux ARM64 (Ubuntu 20.04, Python 3.11) - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang) - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10) - - Windows (VS 2019, Python 3.11) + - Windows (VS 2019, Python 3.11, Release) + - Windows (VS 2019, Python 3.11, Debug) - Windows Conditional Compilation (VS 2022, Python 3.11) types: - completed @@ -31,7 +37,7 @@ jobs: checks: read steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github/scripts/workflow_rerun' @@ -56,13 +62,17 @@ jobs: if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} run: echo "Rerun retriggered for ${{ github.event.workflow_run.html_url }} with ticket ${{ env.FOUND_ERROR_TICKET }}" + - name: ${{ github.event.workflow_run.html_url }} + if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} + run: echo "Step for statistics gathering" + rerunner_tests: name: Rerunner Tests if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }} runs-on: aks-linux-2-cores-8gb steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: '.github/scripts/workflow_rerun' lfs: true diff --git a/.gitmodules b/.gitmodules index a9cad1dee5f494..5feb7458da1801 100644 --- a/.gitmodules +++ b/.gitmodules @@ -78,6 +78,9 @@ [submodule "src/plugins/intel_npu/thirdparty/level-zero-ext"] path = src/plugins/intel_npu/thirdparty/level-zero-ext url = https://github.com/intel/level-zero-npu-extensions.git +[submodule "src/plugins/intel_npu/thirdparty/yaml-cpp"] + path = src/plugins/intel_npu/thirdparty/yaml-cpp + url = https://github.com/jbeder/yaml-cpp.git [submodule "thirdparty/telemetry"] path = thirdparty/telemetry url = https://github.com/openvinotoolkit/telemetry.git diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000000..5044453266940d --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,119 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +openvino_codeofconduct At intel DOT com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +[https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. diff --git a/README.md b/README.md index c37f2ef42b9785..695f84f1628118 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,11 @@ Check [system requirements](https://docs.openvino.ai/2024/about-openvino/system- [OpenVINO Quickstart example](https://docs.openvino.ai/2024/get-started.html) will walk you through the basics of deploying your first model. Learn how to optimize and deploy popular models with the [OpenVINO Notebooks](https://github.com/openvinotoolkit/openvino_notebooks)📚: -- [Create an LLM-powered Chatbot using OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/llm-chatbot.ipynb) -- [YOLOv8 Optimization](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/quantizing-model-with-accuracy-control/yolov8-quantization-with-accuracy-control.ipynb) -- [Text-to-Image Generation](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb) +- [Create an LLM-powered Chatbot using OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb) +- [YOLOv11 Optimization](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/yolov11-optimization/yolov11-object-detection.ipynb) +- [Text-to-Image Generation](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/text-to-image-genai/text-to-image-genai.ipynb) +- [Multimodal assistant with LLaVa and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb) +- [Automatic speech recognition using Whisper and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb) Here are easy-to-follow code examples demonstrating how to run PyTorch and TensorFlow model inference using OpenVINO: diff --git a/SECURITY.md b/SECURITY.md index eb482d90983db3..1840023d700663 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,5 +1,10 @@ # Security Policy +## Security practices + +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/9611/badge)](https://www.bestpractices.dev/projects/9611) +[![Coverity](https://scan.coverity.com/projects/21921/badge.svg)](https://scan.coverity.com/projects/openvino) + ## Report a Vulnerability Please report security issues or vulnerabilities to the [Intel® Security Center]. diff --git a/cmake/developer_package/compile_flags/sanitizer.cmake b/cmake/developer_package/compile_flags/sanitizer.cmake index 73f109d726c88b..5fc24c4f862239 100644 --- a/cmake/developer_package/compile_flags/sanitizer.cmake +++ b/cmake/developer_package/compile_flags/sanitizer.cmake @@ -17,7 +17,7 @@ if (ENABLE_SANITIZER) "https://github.com/openvinotoolkit/openvino/wiki/AddressSanitizer-and-LeakSanitizer") endif() elseif(OV_COMPILER_IS_CLANG) - set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/asan/ignore.txt") + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/sanitizers/asan/ignore.txt") if(BUILD_SHARED_LIBS) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -shared-libasan") endif() @@ -27,7 +27,7 @@ if (ENABLE_SANITIZER) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address") endif() - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/asan/ignore.txt") + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/sanitizers/asan/ignore.txt") if(BUILD_SHARED_LIBS) set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -shared-libasan") endif() @@ -89,7 +89,11 @@ if(ENABLE_THREAD_SANITIZER) message(FATAL_ERROR "Thread sanitizer is not supported in Windows with MSVC compiler. Please, use clang-cl or mingw") elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=thread") - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=thread") + if(OV_COMPILER_IS_CLANG) + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -ltsan") + else() + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=thread") + endif() else() message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") endif() diff --git a/cmake/developer_package/frontends/frontends.cmake b/cmake/developer_package/frontends/frontends.cmake index d2aa0410476245..0815297a11a5eb 100644 --- a/cmake/developer_package/frontends/frontends.cmake +++ b/cmake/developer_package/frontends/frontends.cmake @@ -304,6 +304,9 @@ macro(ov_add_frontend) # then we need to mark it to be CXX ABI free ov_abi_free_target(${TARGET_NAME}) + # public target name + set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME frontend::${OV_FRONTEND_NAME}) + # installation if(NOT OV_FRONTEND_SKIP_INSTALL) @@ -351,9 +354,6 @@ macro(ov_add_frontend) COMPONENT ${dev_component} ${OV_CPACK_COMP_CORE_DEV_EXCLUDE_ALL} FILES_MATCHING PATTERN "*.hpp") - - # public target name - set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME frontend::${OV_FRONTEND_NAME}) endif() else() # skipped frontend has to be installed in static libraries case diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index 0ec054da853e2c..4ac0124d3089f0 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -30,7 +30,7 @@ macro(ov_common_libraries_cpack_set_dirs) ov_get_pyversion(pyversion) if(pyversion) - # should not be used in production; only by setup.py install + # should not be used in production; only by pip install set(OV_CPACK_PYTHONDIR lib/${pyversion}/site-packages) endif() @@ -94,7 +94,7 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) - # we don't pack artifacts of setup.py install, because it's called explicitly in conda / brew + # we don't pack artifacts of pip install, because it's called explicitly in conda / brew # or not used at all like in cases with conan / vcpkg set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) # we don't need wheels in the distribution packages diff --git a/cmake/developer_package/packaging/debian/debian.cmake b/cmake/developer_package/packaging/debian/debian.cmake index 2b95fcfde5c145..a23d5290044e3d 100644 --- a/cmake/developer_package/packaging/debian/debian.cmake +++ b/cmake/developer_package/packaging/debian/debian.cmake @@ -95,12 +95,12 @@ macro(ov_define_component_include_rules) endif() # python if(ENABLE_PYTHON_PACKAGING) - # pack artifacts of setup.py install + # pack artifacts of pip install unset(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL) else() set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) endif() - # we don't pack python components itself, we pack artifacts of setup.py install + # we don't pack python components itself, we pack artifacts of pip install set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake index 45d9b0c0ca2121..bb4e7942d7640b 100644 --- a/cmake/developer_package/packaging/rpm/rpm.cmake +++ b/cmake/developer_package/packaging/rpm/rpm.cmake @@ -86,12 +86,12 @@ macro(ov_define_component_include_rules) endif() # python if(ENABLE_PYTHON_PACKAGING) - # pack artifacts of setup.py install + # pack artifacts of pip install unset(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL) else() set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) endif() - # we don't pack python components itself, we pack artifacts of setup.py install + # we don't pack python components itself, we pack artifacts of pip install set(OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) set(OV_CPACK_COMP_OVC_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index 0eb020a5584f1c..59b312963c180d 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -98,6 +98,7 @@ macro(ov_cpack_settings) 2024.2.0 2024.3.0 2024.4.0 + 2024.5.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index cf915ee852417e..a4a63c35858bf9 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -86,6 +86,7 @@ macro(ov_cpack_settings) 2024.2.0 2024.3.0 2024.4.0 + 2024.5.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/toolchains/mt.runtime.win32.toolchain.cmake b/cmake/toolchains/mt.runtime.win32.toolchain.cmake index 7dd4e1e7f96ded..b331d370bfe7bf 100644 --- a/cmake/toolchains/mt.runtime.win32.toolchain.cmake +++ b/cmake/toolchains/mt.runtime.win32.toolchain.cmake @@ -27,6 +27,11 @@ if(use_static_runtime) foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO") set(flag_var "CMAKE_${lang}_FLAGS${build_type}_INIT") string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + if (build_type STREQUAL "_DEBUG") + set(${flag_var} "/MTd") + else() + set(${flag_var} "/MT") + endif() endforeach() endforeach() endif() diff --git a/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake b/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake similarity index 95% rename from cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake rename to cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake index f5e9e68aabedc6..5bc16de8df91e8 100644 --- a/cmake/toolchains/riscv64-071-thead-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-071-xuantie-gnu.toolchain.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use T-Head compiler: +# NOTE: use Xuantie compiler: # git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git # ./configure --prefix=/opt/riscv # make linux @@ -22,10 +22,10 @@ set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV64_THEAD ON) +set(RISCV64_XUANTIE ON) set(RISCV64_RVV0p7 ON) -set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) diff --git a/cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake b/cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake similarity index 95% rename from cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake rename to cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake index e00e30f975598f..0664b38a9ba68d 100644 --- a/cmake/toolchains/riscv64-100-thead-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-100-xuantie-gnu.toolchain.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use T-Head compiler: +# NOTE: use Xuantie compiler: # git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git # ./configure --prefix=/opt/riscv # make linux @@ -22,10 +22,10 @@ set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV64_THEAD ON) +set(RISCV64_XUANTIE ON) set(RISCV64_RVV1p0 ON) -set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT $ENV{RISCV_TOOLCHAIN_ROOT} CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) diff --git a/cmake/toolchains/riscv64-gnu.toolchain.cmake b/cmake/toolchains/riscv64-gnu.toolchain.cmake index 994b05f66b52f6..b58dcf169fc2da 100644 --- a/cmake/toolchains/riscv64-gnu.toolchain.cmake +++ b/cmake/toolchains/riscv64-gnu.toolchain.cmake @@ -2,12 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 # -# NOTE: use with the following docker image https://github.com/Incarnation-p-lee/riscv-docker-emulator#llvm-clang-tool-chain +# NOTE: use with the following docker image https://github.com/Incarnation-p-lee/riscv-docker-emulator#gnu-toolchain set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(RISCV_TOOLCHAIN_ROOT "/opt/riscv/gnu-toolchain/rv64-linux" CACHE PATH "Path to CLANG for RISC-V cross compiler build directory") +set(RISCV_TOOLCHAIN_ROOT "/opt/riscv/gnu-toolchain/rv64-linux" CACHE PATH "Path to GCC for RISC-V cross compiler build directory") set(CMAKE_SYSROOT "${RISCV_TOOLCHAIN_ROOT}/sysroot" CACHE PATH "RISC-V sysroot") set(CMAKE_C_COMPILER_TARGET riscv64-unknown-linux-gnu) @@ -26,9 +26,6 @@ set(CMAKE_OBJDUMP ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-objdump) set(CMAKE_READELF ${RISCV_TOOLCHAIN_ROOT}/bin/riscv64-unknown-linux-gnu-readelf) set(PKG_CONFIG_EXECUTABLE "NOT-FOUND" CACHE PATH "Path to RISC-V pkg-config") -# Don't run the linker on compiler check -set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) - set(CMAKE_SHARED_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") set(CMAKE_EXE_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") set(CMAKE_MODULE_LINKER_FLAGS_INIT "-L${CMAKE_SYSROOT}/lib") diff --git a/cmake/toolchains/riscv64.linux.toolchain.cmake b/cmake/toolchains/riscv64.linux.toolchain.cmake new file mode 100644 index 00000000000000..cb088f5eca5052 --- /dev/null +++ b/cmake/toolchains/riscv64.linux.toolchain.cmake @@ -0,0 +1,13 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# Install compiler on debian using: +# apt-get install -y gcc-riscv64-linux-gnu g++-riscv64-linux-gnu binutils-riscv64-linux-gnu + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR riscv64) + +set(CMAKE_C_COMPILER riscv64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER riscv64-linux-gnu-g++) +set(CMAKE_STRIP riscv64-linux-gnu-strip) diff --git a/conan.lock b/conan.lock index a21a2a8d7b52f8..f78ad37f8f69ac 100644 --- a/conan.lock +++ b/conan.lock @@ -10,7 +10,7 @@ "opencl-icd-loader/2023.04.17#5f73dd9f0c023d416a7f162e320b9c77%1692732261.088", "opencl-headers/2023.04.17#3d98f2d12a67c2400de6f11d5335b5a6%1683936272.16", "opencl-clhpp-headers/2023.04.17#7c62fcc7ac2559d4839150d2ebaac5c8%1685450803.672", - "onnx/1.16.0#4d2d4f24d6f73b8a7551e001839631f0%1712404811.278", + "onnx/1.17.0#c79fdfca3ae149874153de15a20f4598%1727864447.241", "onetbb/2021.10.0#cbb2fc43088070b48f6e4339bc8fa0e1%1693812561.235", "ittapi/3.24.0#9246125f13e7686dee2b0c992b71db94%1682969872.743", "hwloc/2.9.2#1c63e2eccac57048ae226e6c946ebf0e%1688677682.002", @@ -33,4 +33,4 @@ ], "python_requires": [], "config_requires": [] -} \ No newline at end of file +} diff --git a/conanfile.txt b/conanfile.txt index f124179d52bf12..46d6d8d65d34e9 100644 --- a/conanfile.txt +++ b/conanfile.txt @@ -7,7 +7,7 @@ opencl-icd-loader/[>=2023.04.17] rapidjson/[>=1.1.0] xbyak/[>=6.62] snappy/[>=1.1.7] -onnx/1.16.0 +onnx/1.17.0 pybind11/[>=2.12.0] flatbuffers/[>=22.9.24] diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index b9f15bbe12cccc..3f3d0064e8a4c6 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -15,23 +15,25 @@ deep learning models: | For their usage guides, see :doc:`Devices and Modes <../../openvino-workflow/running-inference/inference-devices-and-modes>`. | For a detailed list of devices, see :doc:`System Requirements <../release-notes-openvino/system-requirements>`. + Beside running inference with a specific device, OpenVINO offers the option of running automated inference with the following inference modes: | :doc:`Automatic Device Selection <../../openvino-workflow/running-inference/inference-devices-and-modes/auto-device-selection>`: -| automatically selects the best device available for the given task. It offers many - additional options and optimizations, including inference on multiple devices at the - same time. +| automatically selects the best device available for the given task. It offers many + additional options and optimizations, including inference on multiple devices at the + same time. + | :doc:`Heterogeneous Inference <../../openvino-workflow/running-inference/inference-devices-and-modes/hetero-execution>`: -| enables splitting inference among several devices automatically, for example, if one device - doesn't support certain operations. +| enables splitting inference among several devices automatically, for example, if one device + doesn't support certain operations. | :doc:`Automatic Batching <../../openvino-workflow/running-inference/inference-devices-and-modes/automatic-batching>`: -| automatically groups inference requests to improve device utilization. +| automatically groups inference requests to improve device utilization. | :doc:`(LEGACY) Multi-device Inference <./../../documentation/legacy-features/multi-device>`: -| executes inference on multiple devices. Currently, this mode is considered a legacy - solution. Using Automatic Device Selection instead is advised. +| executes inference on multiple devices. Currently, this mode is considered a legacy + solution. Using Automatic Device Selection instead is advised. Feature Support and API Coverage @@ -57,30 +59,42 @@ Feature Support and API Coverage +-------------------------+-----------+------------------+-------------------+ | **API Coverage:** | plugin | infer_request | compiled_model | +=========================+===========+==================+===================+ -| CPU | 80.0 % | 100.0 % | 89.74 % | +| CPU | 98.31 % | 100.0 % | 90.7 % | +-------------------------+-----------+------------------+-------------------+ | CPU_ARM | 80.0 % | 100.0 % | 89.74 % | +-------------------------+-----------+------------------+-------------------+ -| GPU | 84.0 % | 100.0 % | 100.0 % | +| GPU | 91.53 % | 100.0 % | 100.0 % | +-------------------------+-----------+------------------+-------------------+ -| dGPU | 82.0 % | 100.0 % | 100.0 % | +| dGPU | 89.83 % | 100.0 % | 100.0 % | +-------------------------+-----------+------------------+-------------------+ -| NPU | 16.0 % | 0.0 % | 10.26 % | +| NPU | 18.64 % | 0.0 % | 9.3 % | +-------------------------+-----------+------------------+-------------------+ -| AUTO | 40.0 % | 100.0 % | 97.44 % | +| AUTO | 93.88 % | 100.0 % | 100.0 % | +-------------------------+-----------+------------------+-------------------+ -| BATCH | 26.0 % | 100.0 % | 58.97 % | +| BATCH | 86.05 % | 100.0 % | 86.05 % | +-------------------------+-----------+------------------+-------------------+ -| HETERO | 30.0 % | 99.23 % | 58.97 % | +| HETERO | 61.22 % | 99.24 % | 86.05 % | +-------------------------+-----------+------------------+-------------------+ | || Percentage of API supported by the device, | -| || as of OpenVINO 2023.3, 08 Jan, 2024. | +| || as of OpenVINO 2024.4, 25 Oct, 2024. | +-------------------------+-----------+------------------+-------------------+ For setting up a relevant configuration, refer to the :doc:`Integrate with Customer Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` topic (step 3 "Configure input and output"). +.. dropdown:: Device support across OpenVINO 2024.4 distributions + + =============== ========== ====== =============== ======== ============ ========== ========== ========== + Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm + =============== ========== ====== =============== ======== ============ ========== ========== ========== + CPU V V V V V V V V + GPU V V V V V V V V + NPU V\* V\* V\ * n/a n/a n/a n/a V\* + =============== ========== ====== =============== ======== ============ ========== ========== ========== + + | \* **Of the Linux systems, versions 22.04 and 24.04 include drivers for NPU.** + | **For Windows, CPU inference on ARM64 is not supported.** .. note:: @@ -89,6 +103,7 @@ topic (step 3 "Configure input and output"). in your solutions, revert to the 2023.3 (LTS) version. With the OpenVINO™ 2023.0 release, support has been cancelled for: + - Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X - Intel® Vision Accelerator Design with Intel® Movidius™ diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst index 5bd414776784ce..d27f7626391f46 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-operations.rst @@ -9,6 +9,8 @@ Here, you will find comprehensive information on operations supported by OpenVIN conformance reports provide operation coverage for inference devices, while the tables list operations available for all OpenVINO framework frontends. +Data as of OpenVINO 2024.4, 18 Oct. 2024. + **Device-operation conformance reports:** .. grid:: 1 1 2 2 @@ -32,6 +34,7 @@ operations available for all OpenVINO framework frontends. ops including dynamic inputs + **Operations supported by OpenVINO frontend Frameworks:** .. tab-set:: diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 40b94210f6c43d..75c7ba90db7e76 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -13,17 +13,15 @@ Performance Benchmarks Efficient LLMs for AI PC Performance Information F.A.Q. OpenVINO Accuracy - Getting Performance Numbers + Getting Performance Numbers -This page presents benchmark results for +This page presents benchmark results for the `Intel® Distribution of OpenVINO™ toolkit `__ and :doc:`OpenVINO Model Server <../openvino-workflow/model-server/ovms_what_is_openvino_model_server>`, for a representative selection of public neural networks and Intel® devices. The results may help you decide which hardware to use in your applications or plan AI workload for the hardware you have already implemented in your solutions. Click the buttons below to see the chosen benchmark data. -For a more detailed view of performance numbers for generative AI models, check the -:doc:`Generative AI Benchmark Results <./performance-benchmarks/generative-ai-performance>` .. grid:: 1 1 2 2 :gutter: 4 @@ -36,7 +34,7 @@ For a more detailed view of performance numbers for generative AI models, check :outline: :expand: - :material-regular:`bar_chart;1.4em` OpenVINO Benchmark Graphs + :material-regular:`bar_chart;1.4em` OpenVINO Benchmark Graphs (general) .. grid-item:: @@ -46,10 +44,35 @@ For a more detailed view of performance numbers for generative AI models, check :outline: :expand: - :material-regular:`bar_chart;1.4em` OVMS Benchmark Graphs + :material-regular:`bar_chart;1.4em` OVMS Benchmark Graphs (general) + + .. grid-item:: + + .. button-link:: ./performance-benchmarks/generative-ai-performance.html + :class: ov-toolkit-benchmark-genai + :color: primary + :outline: + :expand: + + :material-regular:`table_view;1.4em` LLM performance for AI PC + + .. grid-item:: + + .. button-link:: # + :class: ovms-toolkit-benchmark-llm-result + :color: primary + :outline: + :expand: + + :material-regular:`bar_chart;1.4em` OVMS for GenAI + + + + -Key performance indicators and workload parameters. + +**Key performance indicators and workload parameters** .. tab-set:: @@ -65,13 +88,13 @@ Key performance indicators and workload parameters. .. tab-item:: Latency :sync: latency - For Vision and NLP models this mhis measures the synchronous execution of inference requests and is reported in - milliseconds. Each inference request (for example: preprocess, infer, postprocess) is - allowed to complete before the next is started. This performance metric is relevant in - usage scenarios where a single image input needs to be acted upon as soon as possible. An - example would be the healthcare sector where medical personnel only request analysis of a - single ultra sound scanning image or in real-time or near real-time applications for - example an industrial robot's response to actions in its environment or obstacle avoidance + For Vision and NLP models this measures the synchronous execution of inference requests and + is reported in milliseconds. Each inference request (for example: preprocess, infer, + postprocess) is allowed to complete before the next one starts. This performance metric is + relevant in usage scenarios where a single image input needs to be acted upon as soon as + possible. An example would be the healthcare sector where medical personnel only request + analysis of a single ultra sound scanning image or in real-time or near real-time applications + such as an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles. For Transformer models like Stable-Diffusion this measures the time it takes to convert the prompt or input text into a finished image. It is presented in seconds. @@ -97,9 +120,10 @@ Key performance indicators and workload parameters. * input token length: 1024 (the tokens for GenAI models are in English). -.. raw:: html +**Platforms, Configurations, Methodology** -

Platforms, Configurations, Methodology

+To see the methodology used to obtain the numbers and learn how to test performance yourself, +see the guide on :doc:`getting performance numbers `. For a listing of all platforms and configurations used for testing, refer to the following: @@ -130,59 +154,10 @@ For a listing of all platforms and configurations used for testing, refer to the :material-regular:`download;1.5em` Click for Performance Data [XLSX] -The OpenVINO benchmark setup includes a single system with OpenVINO™, as well as the benchmark -application installed. It measures the time spent on actual inference (excluding any pre or post -processing) and then reports on the inferences per second (or Frames Per Second). - -OpenVINO™ Model Server (OVMS) employs the Intel® Distribution of OpenVINO™ toolkit runtime -libraries and exposes a set of models via a convenient inference API over gRPC or HTTP/REST. -Its benchmark results are measured with the configuration of multiple-clients-single-server, -using two hardware platforms connected by ethernet. Network bandwidth depends on both platforms -and models used. It is set not to be a bottleneck for workload intensity. The connection is -dedicated only to measuring performance. - -.. dropdown:: See more details about OVMS benchmark setup - - The benchmark setup for OVMS consists of four main parts: - .. image:: ../assets/images/performance_benchmarks_ovms_02.png - :alt: OVMS Benchmark Setup Diagram - * **OpenVINO™ Model Server** is launched as a docker container on the server platform and it - listens to (and answers) requests from clients. OpenVINO™ Model Server is run on the same - system as the OpenVINO™ toolkit benchmark application in corresponding benchmarking. Models - served by OpenVINO™ Model Server are located in a local file system mounted into the docker - container. The OpenVINO™ Model Server instance communicates with other components via ports - over a dedicated docker network. - * **Clients** are run in separated physical machine referred to as client platform. Clients - are implemented in Python3 programming language based on TensorFlow* API and they work as - parallel processes. Each client waits for a response from OpenVINO™ Model Server before it - will send a new next request. The role played by the clients is also verification of - responses. - - * **Load balancer** works on the client platform in a docker container. HAProxy is used for - this purpose. Its main role is counting of requests forwarded from clients to OpenVINO™ - Model Server, estimating its latency, and sharing this information by Prometheus service. - The reason of locating the load balancer on the client site is to simulate real life - scenario that includes impact of physical network on reported metrics. - - * **Execution Controller** is launched on the client platform. It is responsible for - synchronization of the whole measurement process, downloading metrics from the load - balancer, and presenting the final report of the execution. - - - -.. raw:: html - -

Test performance yourself

- -You can also test performance for your system yourself, following the guide on -:doc:`getting performance numbers `. - -.. raw:: html - -

Disclaimers

+**Disclaimers** * Intel® Distribution of OpenVINO™ toolkit performance results are based on release 2024.3, as of July 31, 2024. @@ -192,12 +167,11 @@ You can also test performance for your system yourself, following the guide on The results may not reflect all publicly available updates. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software, or service -activation. Learn more at intel.com, or from the OEM or retailer. +activation. Learn more at intel.com, the OEM, or retailer. See configuration disclosure for details. No product can be absolutely secure. Performance varies by use, configuration and other factors. Learn more at `www.intel.com/PerformanceIndex `__. -Your costs and results may vary. Intel optimizations, for Intel compilers or other products, may not optimize to the same degree for non-Intel products. @@ -205,9 +179,6 @@ for non-Intel products. - - - .. raw:: html diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 35e09f91f72b9c..b8256af650e2f8 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -3,9 +3,11 @@ Most Efficient Large Language Models for AI PC This page is regularly updated to help you identify the best-performing LLMs on the Intel® Core™ Ultra processor family and AI PCs. +The current data is as of OpenVINO 2024.4, 24 Oct. 2024 The tables below list the key performance indicators for a selection of Large Language Models, -running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. +running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and Intel® Core™ Ultra +7-288V based system, on built-in GPUs. @@ -15,32 +17,38 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. -.. tab-set:: +.. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models.csv - .. tab-item:: OpenVINO - - .. csv-table:: - :class: modeldata stripe - :name: supportedModelsTableOv - :header-rows: 1 - :file: ../../_static/download/llm_models.csv +| +.. grid:: 1 1 2 2 + :gutter: 4 -For complete information on the system config, see: -`Hardware Platforms [PDF] `__ + .. grid-item:: -To view the data in an editable form, you can download the .csv file here: + All models listed here were tested with the following parameters: -.. grid:: 1 1 2 2 - :gutter: 4 + * Framework: PyTorch + * Beam: 1 + * Batch size: 1 .. grid-item:: - .. button-link:: ../../_static/download/llm_models.csv + .. button-link:: https://docs.openvino.ai/2024/_static/benchmarks_files/llm_models_platform_list_.pdf :color: primary :outline: :expand: - :material-regular:`download;1.5em` Click for OpenVINO LLM results [CSV] + :material-regular:`download;1.5em` Get system descriptions [PDF] + + .. button-link:: ../../_static/benchmarks_files/llm_models.csv + :color: primary + :outline: + :expand: + :material-regular:`download;1.5em` Get the data in .csv [CSV] diff --git a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst index 069c940063cf14..936f1145a6b3b0 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst @@ -1,124 +1,201 @@ Getting Performance Numbers =========================== +1. `Benchmarking methodology for OpenVINO <#benchmarking-methodology-for-openvino>`__ + a. `OpenVINO benchmarking (general) <#openvino-benchmarking--general->`__ + b. `OpenVINO Model Server benchmarking (general) <#openvino-model-server-benchmarking--general->`__ + c. `OpenVINO Model Server benchmarking (LLM) <#openvino-model-server-benchmarking--llm->`__ -This guide explains how to use the benchmark_app to get performance numbers. It also explains how the performance -numbers are reflected through internal inference performance counters and execution graphs. It also includes -information on using ITT and Intel® VTune™ Profiler to get performance insights. +2. `How to obtain benchmark results <#how-to-obtain-benchmark-results>`__ + a. `General considerations <#general-considerations>`__ + b. `OpenVINO benchmarking (general) <#openvino-benchmarking--general->`__ + c. `OpenVINO benchmarking (LLM) <#openvino-benchmarking--llm->`__ -.. raw:: html -

Test performance with the benchmark_app

+Benchmarking methodology for OpenVINO +############################################################################################### -You can run OpenVINO benchmarks in both C++ and Python APIs, yet the experience differs in each case. -The Python one is part of OpenVINO Runtime installation, while C++ is available as a code sample. -For a detailed description, see: :doc:`benchmark_app <../../learn-openvino/openvino-samples/benchmark-tool>`. +OpenVINO benchmarking (general) +++++++++++++++++++++++++++++++++++++++++++++ -Make sure to install the latest release package with support for frameworks of the models you want to test. -For the most reliable performance benchmarks, :doc:`prepare the model for use with OpenVINO <../../openvino-workflow/model-preparation>`. +The OpenVINO benchmark setup includes a single system with OpenVINO™, as well as the benchmark +application installed. It measures the time spent on actual inference (excluding any pre or post +processing) and then reports on the inferences per second (or Frames Per Second). +OpenVINO Model Server benchmarking (general) +++++++++++++++++++++++++++++++++++++++++++++ -.. raw:: html +OpenVINO™ Model Server (OVMS) employs the Intel® Distribution of OpenVINO™ toolkit runtime +libraries and exposes a set of models via a convenient inference API over gRPC or HTTP/REST. +Its benchmark results are measured with the configuration of multiple-clients-single-server, +using two hardware platforms connected by ethernet. Network bandwidth depends on both platforms +and models used. It is set not to be a bottleneck for workload intensity. The connection is +dedicated only to measuring performance. -

Running the benchmark application

+.. dropdown:: See more details about OVMS benchmark setup + The benchmark setup for OVMS consists of four main parts: -The benchmark_app includes a lot of device-specific options, but the primary usage is as simple as: + .. image:: ../../assets/images/performance_benchmarks_ovms_02.png + :alt: OVMS Benchmark Setup Diagram -.. code-block:: sh + * **OpenVINO™ Model Server** is launched as a docker container on the server platform and it + listens to (and answers) requests from clients. OpenVINO™ Model Server is run on the same + system as the OpenVINO™ toolkit benchmark application in corresponding benchmarking. Models + served by OpenVINO™ Model Server are located in a local file system mounted into the docker + container. The OpenVINO™ Model Server instance communicates with other components via ports + over a dedicated docker network. - benchmark_app -m -d -i + * **Clients** are run in separated physical machine referred to as client platform. Clients + are implemented in Python3 programming language based on TensorFlow* API and they work as + parallel processes. Each client waits for a response from OpenVINO™ Model Server before it + will send a new next request. The role played by the clients is also verification of + responses. + * **Load balancer** works on the client platform in a docker container. HAProxy is used for + this purpose. Its main role is counting of requests forwarded from clients to OpenVINO™ + Model Server, estimating its latency, and sharing this information by Prometheus service. + The reason of locating the load balancer on the client site is to simulate real life + scenario that includes impact of physical network on reported metrics. -Each of the :doc:`OpenVINO supported devices <../compatibility-and-support/supported-devices>` offers -performance settings that contain command-line equivalents in the Benchmark app. + * **Execution Controller** is launched on the client platform. It is responsible for + synchronization of the whole measurement process, downloading metrics from the load + balancer, and presenting the final report of the execution. -While these settings provide really low-level control for the optimal model performance on the *specific* device, -it is recommended to always start performance evaluation with the :doc:`OpenVINO High-Level Performance Hints <../../openvino-workflow/running-inference/optimize-inference/high-level-performance-hints>` first, like so: -.. code-block:: sh +OpenVINO Model Server benchmarking (LLM) +++++++++++++++++++++++++++++++++++++++++ - # for throughput prioritization - benchmark_app -hint tput -m -d - # for latency prioritization - benchmark_app -hint latency -m -d +In the benchmarking results presented here, the load from clients is simulated using the +benchmark_serving.py script from vLLM and the ShareGPT dataset. It represents real life usage +scenarios. Both OpenVINO Model Server and vLLM expose OpenAI-compatible REST endpoints so the +methodology is identical. +In the experiments, we change the average request rate to identify the tradeoff between total +throughput and the TPOT latency. +Note that in the benchmarking, the feature of prefix_caching is not used. -.. raw:: html -

Additional benchmarking considerations

-.. raw:: html +How to obtain benchmark results +############################################################################################### -

1 - Select a Proper Set of Operations to Measure

+General considerations +++++++++++++++++++++++ +.. dropdown:: Select a proper set of operations to measure -When evaluating performance of a model with OpenVINO Runtime, it is required to measure a proper set of operations. + When evaluating performance of a model with OpenVINO Runtime, it is required to measure a + proper set of operations. -- Avoid including one-time costs such as model loading. -- Track operations that occur outside OpenVINO Runtime (such as video decoding) separately. + * Avoid including one-time costs such as model loading. + * Track operations that occur outside OpenVINO Runtime, such as video decoding, separately. + .. note:: -.. note:: + Some image pre-processing can be baked into OpenVINO IR and accelerated accordingly. + For more information, refer to + :doc:`Embedding Pre-processing <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` + and + :doc:`General Runtime Optimizations <../../openvino-workflow/running-inference/optimize-inference/general-optimizations>`. - Some image pre-processing can be baked into OpenVINO IR and accelerated accordingly. For more information, - refer to :doc:`Embedding Pre-processing <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` and - :doc:`General Runtime Optimizations <../../openvino-workflow/running-inference/optimize-inference/general-optimizations>`. +.. dropdown:: Maximize the chance to obtain credible data + Performance conclusions should be build on reproducible data. As for the performance + measurements, they should be done with a large number of invocations of the same routine. + Since the first iteration is almost always significantly slower than the subsequent ones, + an aggregated value can be used for the execution time for final projections: + * If the warm-up run does not help or execution times still vary, you can try running a + large number of iterations and then use the mean value of the results. + * If time values differ too much, consider using a geomean. + * Be aware of potential power-related irregularities, such as throttling. A device may assume + one of several different power states, so it is advisable to fix its frequency when + optimizing, for better performance data reproducibility. + * Note that end-to-end application benchmarking should also be performed under real + operational conditions. -.. raw:: html +.. dropdown:: Compare performance with native/framework code -

2 - Try to Get Credible Data

+ When comparing OpenVINO Runtime performance with the framework or reference code, + make sure that both versions are as similar as possible: -Performance conclusions should be build upon reproducible data. As for the performance measurements, they should -be done with a large number of invocations of the same routine. Since the first iteration is almost always significantly -slower than the subsequent ones, an aggregated value can be used for the execution time for final projections: + * Wrap the exact inference execution (for examples, see :doc:`Benchmark app <../../learn-openvino/openvino-samples/benchmark-tool>`). + * Do not include model loading time. + * Ensure that the inputs are identical for OpenVINO Runtime and the framework. For example, watch out for random values that can be used to populate the inputs. + * In situations when any user-side pre-processing should be tracked separately, consider :doc:`image pre-processing and conversion <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>`. + * When applicable, leverage the :doc:`Dynamic Shapes support <../../openvino-workflow/running-inference/dynamic-shapes>`. + * If possible, demand the same accuracy. For example, TensorFlow allows ``FP16`` execution, so when comparing to that, make sure to test the OpenVINO Runtime with the ``FP16`` as well. -- If the warm-up run does not help or execution time still varies, you can try running a large number of iterations - and then average or find a mean of the results. -- If the time values range too much, consider geomean. -- Be aware of the throttling and other power oddities. A device can exist in one of several different power states. - When optimizing your model, consider fixing the device frequency for better performance data reproducibility. - However, the end-to-end (application) benchmarking should also be performed under real operational conditions. +.. dropdown:: Make sure the benchmarking setup is proper for the selected scenario + * Install the latest release package supporting the frameworks of the tested models. + * For the most reliable performance benchmarks, + :doc:`prepare the model for use with OpenVINO <../../openvino-workflow/model-preparation>`. + * For testing generative AI models, make sure you select the method that best suits your case, + Optimum-Intel or the OpenVINO GenAI package. -.. raw:: html -

3 - Compare Performance with Native/Framework Code

+OpenVINO benchmarking (general) ++++++++++++++++++++++++++++++++ -When comparing the OpenVINO Runtime performance with the framework or another reference code, make sure that both versions are as similar as possible: +The default way of measuring OpenVINO performance is running a piece of code, referred to as +:doc:`the benchmark tool <../../learn-openvino/openvino-samples/benchmark-tool>`. +For Python, it is part of the OpenVINO Runtime installation, while for C++, it is available as +a code sample. -- Wrap the exact inference execution (for examples, see :doc:`Benchmark app <../../learn-openvino/openvino-samples/benchmark-tool>`). -- Do not include model loading time. -- Ensure that the inputs are identical for OpenVINO Runtime and the framework. For example, watch out for random values that can be used to populate the inputs. -- In situations when any user-side pre-processing should be tracked separately, consider :doc:`image pre-processing and conversion <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>`. -- When applicable, leverage the :doc:`Dynamic Shapes support <../../openvino-workflow/running-inference/dynamic-shapes>`. -- If possible, demand the same accuracy. For example, TensorFlow allows ``FP16`` execution, so when comparing to that, make sure to test the OpenVINO Runtime with the ``FP16`` as well. +Running the benchmark application +--------------------------------- + +The benchmark_app includes a lot of device-specific options, but the primary usage is as simple +as: + +.. code-block:: sh + + benchmark_app -m -d -i -.. raw:: html -

Internal Inference Performance Counters and Execution Graphs

+Each of the :doc:`OpenVINO supported devices <../compatibility-and-support/supported-devices>` +offers performance settings that contain command-line equivalents in the Benchmark app. -More detailed insights into inference performance breakdown can be achieved with device-specific performance counters and/or execution graphs. +While these settings provide really low-level control for the optimal model performance on a +*specific* device, it is recommended to always start performance evaluation with the +:doc:`OpenVINO High-Level Performance Hints <../../openvino-workflow/running-inference/optimize-inference/high-level-performance-hints>` +first, like so: + +.. code-block:: sh + + # for throughput prioritization + benchmark_app -hint tput -m -d + # for latency prioritization + benchmark_app -hint latency -m -d + + +Internal Inference Performance Counters and Execution Graphs +------------------------------------------------------------- + +More detailed insights into inference performance breakdown can be achieved with device-specific +performance counters and/or execution graphs. Both :doc:`C++ and Python <../../learn-openvino/openvino-samples/benchmark-tool>` -versions of the *benchmark_app* support a ``-pc`` command-line parameter that outputs internal execution breakdown. +versions of the benchmark_app support a ``-pc`` command-line parameter that outputs an internal +execution breakdown. -For example, the table shown below is part of performance counters for quantized -`TensorFlow implementation of ResNet-50 `__ -model inference on :doc:`CPU Plugin <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. -Keep in mind that since the device is CPU, the ``realTime`` wall clock and the ``cpu`` time layers are the same. -Information about layer precision is also stored in the performance counters. +For example, the table below is part of performance counters for +:doc:`CPU inference <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. +of a `TensorFlow implementation of ResNet-50 `__ +Keep in mind that since the device is CPU, the ``realTime`` wall clock and the ``cpu`` time +layers are the same. Information about layer precision is also stored in the performance +counters. =========================================================== ============= ============== ===================== ================= ============== @@ -136,39 +213,63 @@ Information about layer precision is also stored in the performance counters. | The ``execStatus`` column of the table includes the following possible values: | - ``EXECUTED`` - the layer was executed by standalone primitive. -| - ``NOT_RUN`` - the layer was not executed by standalone primitive or was fused with another operation and executed in another layer primitive. +| - ``NOT_RUN`` - the layer was not executed by standalone primitive or was fused with + another operation and executed in another layer primitive. | -| The ``execType`` column of the table includes inference primitives with specific suffixes. The layers could have the following marks: -| - The ``I8`` suffix is for layers that had 8-bit data type input and were computed in 8-bit precision. +| The ``execType`` column of the table includes inference primitives with specific suffixes. + The layers could have the following marks: +| - The ``I8`` suffix is for layers that had 8-bit data type input and were computed in + 8-bit precision. | - The ``FP32`` suffix is for layers computed in 32-bit precision. | -| All ``Convolution`` layers are executed in ``int8`` precision. The rest of the layers are fused into Convolutions using post-operation optimization, - as described in :doc:`CPU Device <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. This contains layer names - (as seen in OpenVINO IR), type of the layer, and execution statistics. +| All ``Convolution`` layers are executed in ``int8`` precision. The rest of the layers are + fused into Convolutions using post-operation optimization, as described in + :doc:`CPU Device <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. + This contains layer names (as seen in OpenVINO IR), type of the layer, and execution + statistics. -Both *benchmark_app* versions also support the ``exec_graph_path`` command-line option. It requires OpenVINO to output the same execution -statistics per layer, but in the form of plugin-specific `Netron-viewable `__ graph to the specified file. +Both *benchmark_app* versions also support the ``exec_graph_path`` command-line option. +It requires OpenVINO to output the same execution statistics per layer, but in the form of +plugin-specific `Netron-viewable `__ graph to the specified file. + +Especially when performance-debugging +:doc:`latency <../../openvino-workflow/running-inference/optimize-inference/optimizing-latency>`, +note that the counters do not reflect the time spent in the ``plugin/device/driver/etc`` queues. +If the sum of the counters is too different from the latency of an inference request, consider +testing with less inference requests. For example, running single +:doc:`OpenVINO stream <../../openvino-workflow/running-inference/optimize-inference/optimizing-throughput>` +with multiple requests would produce nearly identical counters as running a single inference +request, while the actual latency can be quite different. + +Lastly, the performance statistics with both performance counters and execution graphs are +averaged, so such data for the +:doc:`inputs of dynamic shapes <../../openvino-workflow/running-inference/dynamic-shapes>` +should be measured carefully, preferably by isolating the specific shape and executing multiple +times in a loop, to gather reliable data. + +Use ITT to Get Performance Insights +-------------------------------------- + +In general, OpenVINO and its individual plugins are heavily instrumented with Intel® +Instrumentation and Tracing Technology (ITT). Therefore, you can also compile OpenVINO from the +source code with ITT enabled and use tools like +`Intel® VTune™ Profiler `__ +to get detailed inference performance breakdown and additional insights in the application-level +performance on the timeline view. + + +OpenVINO benchmarking (LLM) ++++++++++++++++++++++++++++++++ + +Large Language Models require a different benchmarking approach to static models. A detailed +description will be added soon. -Especially when performance-debugging the :doc:`latency <../../openvino-workflow/running-inference/optimize-inference/optimizing-latency>`, note that the counters -do not reflect the time spent in the ``plugin/device/driver/etc`` queues. If the sum of the counters is too different from the latency -of an inference request, consider testing with less inference requests. For example, running single -:doc:`OpenVINO stream <../../openvino-workflow/running-inference/optimize-inference/optimizing-throughput>` with multiple requests would produce nearly identical -counters as running a single inference request, while the actual latency can be quite different. -Lastly, the performance statistics with both performance counters and execution graphs are averaged, -so such data for the :doc:`inputs of dynamic shapes <../../openvino-workflow/running-inference/dynamic-shapes>` should be measured carefully, -preferably by isolating the specific shape and executing multiple times in a loop, to gather reliable data. -.. raw:: html -

Use ITT to Get Performance Insights

-In general, OpenVINO and its individual plugins are heavily instrumented with Intel® Instrumentation and Tracing Technology (ITT). -Therefore, you can also compile OpenVINO from the source code with ITT enabled and use tools like -`Intel® VTune™ Profiler `__ to get detailed inference performance breakdown and additional -insights in the application-level performance on the timeline view. diff --git a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst index 8b93e6a1aebe7b..3162bae7254704 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst @@ -4,9 +4,10 @@ Model Accuracy The following two tables present the absolute accuracy drop calculated as the accuracy difference -between OV-accuracy and the original frame work accuracy for FP32, and the same for INT8, BF16 and -FP16 representations of a model on three platform architectures. The third table presents the GenAI model accuracies as absolute accuracy values. Please also refer to notes below -the table for more information. +between OV-accuracy and the original framework accuracy for FP32, and the same for INT8, BF16, +and FP16 representations of a model on three platform architectures. The third table presents +the GenAI model accuracies as absolute accuracy values. Refer to notes below the table for more +information. * A - Intel® Core™ i9-9000K (AVX2), INT8 and FP32 * B - Intel® Xeon® 6338, (VNNI), INT8 and FP32 diff --git a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst index c55d3f44451f1c..4bf0b3a0acb19a 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst @@ -58,11 +58,11 @@ Performance Information F.A.Q. - Hugginface - Causal Decoder-only - 2048 - * - `Llama-2-7b-chat `__ + * - `Llama-2-7b-chat `__ - Meta AI - Auto regressive language - 4096 - * - `Llama-3-8b `__ + * - `Llama-3-8b `__ - Meta AI - Auto regressive language - 8192 @@ -74,7 +74,7 @@ Performance Information F.A.Q. - Huggingface - Auto regressive language - 4096 - * - `Stable-Diffusion-V1-5 `__ + * - `Stable-Diffusion-V1-5 `__ - Hugginface - Latent Diffusion Model - 77 @@ -118,7 +118,7 @@ Performance Information F.A.Q. - YOLO V5 Medium - object detection - 640x640 - * - `yolov8n `__ + * - `yolov8n `__ - Yolov8nano - object detection - 608x608 diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 4bd0b5d32c0f0e..6685a4325d57fe 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -943,7 +943,7 @@ Previous 2024 releases deployed in an arbitrary path without any code changes. * KServe REST API support has been extended to properly handle the string format in JSON body, just like the binary format compatible with NVIDIA Triton™. - * `A demo showcasing a full RAG algorithm `__ + * `A demo showcasing a full RAG algorithm `__ fully delegated to the model server has been added. **Neural Network Compression Framework** @@ -1000,7 +1000,7 @@ Previous 2024 releases * `RMBG background removal `__ * `AnimateAnyone: pose guided image to video generation `__ * `LLaVA-Next visual-language assistant `__ - * `TripoSR: single image 3d reconstruction `__ + * `TripoSR: single image 3d reconstruction `__ * `RAG system with OpenVINO and LangChain `__ *Known Issues* @@ -1309,7 +1309,7 @@ Discontinued in 2024 * `Accuracy Checker `__. * `Post-Training Optimization Tool `__ (POT). Neural Network Compression Framework (NNCF) should be used instead. - * A `Git patch `__ + * A `Git patch `__ for NNCF integration with `huggingface/transformers `__. The recommended approach is to use `huggingface/optimum-intel `__ for applying NNCF optimization on top of models from Hugging Face. @@ -1360,25 +1360,25 @@ Deprecated and to be removed in the future * See alternative: `PaddleOCR with OpenVINO™ `__, * See alternative: `Handwritten Text Recognition Demo `__ - * `Image In-painting with OpenVINO™ `__ + * `Image In-painting with OpenVINO™ `__ * See alternative: `Image Inpainting Python Demo `__ - * `Interactive Machine Translation with OpenVINO `__ + * `Interactive Machine Translation with OpenVINO `__ * See alternative: `Machine Translation Python* Demo `__ - * `Open Model Zoo Tools Tutorial `__ + * `Open Model Zoo Tools Tutorial `__ * No alternatives, demonstrates deprecated tools. - * `Super Resolution with OpenVINO™ `__ + * `Super Resolution with OpenVINO™ `__ * See alternative: `Super Resolution with PaddleGAN and OpenVINO `__ * See alternative: `Image Processing C++ Demo `__ - * `Image Colorization with OpenVINO Tutorial `__ - * `Interactive Question Answering with OpenVINO™ `__ + * `Image Colorization with OpenVINO Tutorial `__ + * `Interactive Question Answering with OpenVINO™ `__ * See alternative: `BERT Question Answering Embedding Python* Demo `__ * See alternative: `BERT Question Answering Python* Demo `__ @@ -1387,37 +1387,37 @@ Deprecated and to be removed in the future * See alternative: `Security Barrier Camera C++ Demo `__ - * `The attention center model with OpenVINO™ `_ - * `Image Generation with DeciDiffusion `_ - * `Image generation with DeepFloyd IF and OpenVINO™ `_ - * `Depth estimation using VI-depth with OpenVINO™ `_ + * `The attention center model with OpenVINO™ `_ + * `Image Generation with DeciDiffusion `_ + * `Image generation with DeepFloyd IF and OpenVINO™ `_ + * `Depth estimation using VI-depth with OpenVINO™ `_ * `Instruction following using Databricks Dolly 2.0 and OpenVINO™ `_ * See alternative: `LLM Instruction-following pipeline with OpenVINO `__ - * `Image generation with FastComposer and OpenVINO™ `__ + * `Image generation with FastComposer and OpenVINO™ `__ * `Video Subtitle Generation with OpenAI Whisper `__ * See alternative: `Automatic speech recognition using Distil-Whisper and OpenVINO `__ - * `Introduction to Performance Tricks in OpenVINO™ `__ - * `Speaker Diarization with OpenVINO™ `__ - * `Subject-driven image generation and editing using BLIP Diffusion and OpenVINO `__ - * `Text Prediction with OpenVINO™ `__ - * `Training to Deployment with TensorFlow and OpenVINO™ `__ - * `Speech to Text with OpenVINO™ `__ - * `Convert and Optimize YOLOv7 with OpenVINO™ `__ - * `Quantize Data2Vec Speech Recognition Model using NNCF PTQ API `__ + * `Introduction to Performance Tricks in OpenVINO™ `__ + * `Speaker Diarization with OpenVINO™ `__ + * `Subject-driven image generation and editing using BLIP Diffusion and OpenVINO `__ + * `Text Prediction with OpenVINO™ `__ + * `Training to Deployment with TensorFlow and OpenVINO™ `__ + * `Speech to Text with OpenVINO™ `__ + * `Convert and Optimize YOLOv7 with OpenVINO™ `__ + * `Quantize Data2Vec Speech Recognition Model using NNCF PTQ API `__ * See alternative: `Quantize Speech Recognition Models with accuracy control using NNCF PTQ API `__ - * `Semantic segmentation with LRASPP MobileNet v3 and OpenVINO `__ - * `Video Recognition using SlowFast and OpenVINO™ `__ + * `Semantic segmentation with LRASPP MobileNet v3 and OpenVINO `__ + * `Video Recognition using SlowFast and OpenVINO™ `__ * See alternative: `Live Action Recognition with OpenVINO™ `__ - * `Semantic Segmentation with OpenVINO™ using Segmenter `__ - * `Programming Language Classification with OpenVINO `__ + * `Semantic Segmentation with OpenVINO™ using Segmenter `__ + * `Programming Language Classification with OpenVINO `__ * `Stable Diffusion Text-to-Image Demo `__ * See alternative: `Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware `__ @@ -1426,10 +1426,10 @@ Deprecated and to be removed in the future * See alternative: `Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware `__ - * `Image generation with Segmind Stable Diffusion 1B (SSD-1B) model and OpenVINO `__ - * `Data Preparation for 2D Medical Imaging `__ - * `Train a Kidney Segmentation Model with MONAI and PyTorch Lightning `__ - * `Live Inference and Benchmark CT-scan Data with OpenVINO™ `__ + * `Image generation with Segmind Stable Diffusion 1B (SSD-1B) model and OpenVINO `__ + * `Data Preparation for 2D Medical Imaging `__ + * `Train a Kidney Segmentation Model with MONAI and PyTorch Lightning `__ + * `Live Inference and Benchmark CT-scan Data with OpenVINO™ `__ * See alternative: `Quantize a Segmentation Model and Show Live Inference `__ @@ -1458,7 +1458,7 @@ are available on request. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at -`www.intel.com `__ +`www.intel.com `__ or from the OEM or retailer. No computer system can be absolutely secure. diff --git a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst index 50ac3a3350658e..a12cacf8402953 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst @@ -114,7 +114,7 @@ Operating systems and developer environment Build environment components: - * Python 3.8-3.12 + * Python 3.9-3.12 * `Intel® HD Graphics Driver `__ required for inference on GPU * GNU Compiler Collection and CMake are needed for building from source: @@ -128,11 +128,16 @@ Operating systems and developer environment .. tab-item:: Windows 10 and 11 + OpenVINO Runtime requires certain C++ libraries to operate. To execute ready-made apps, + the libraries distributed by `Visual Studio redistributable package `__ + are suggested. For development and compilation of OpenVINO-integrated apps, the build + environment components are required instead. + Build environment components: - * `Microsoft Visual Studio 2019 `__ + * `Microsoft Visual Studio 2019 or later `__ * `CMake `__ 3.16 or higher - * `Python `__ 3.8-3.12 + * `Python `__ 3.9-3.12 * `Intel® HD Graphics Driver `__ required for inference on GPU @@ -144,7 +149,7 @@ Operating systems and developer environment * `Xcode `__ 10.3 * `CMake `__ 3.13 or higher - * `Python `__ 3.8-3.12 + * `Python `__ 3.9-3.12 .. tab-item:: DL framework versions: @@ -162,7 +167,193 @@ Operating systems and developer environment OpenVINO Python binaries are built with and redistribute oneTBB libraries. +OpenVINO Distributions +###################### + +Different OpenVINO distributions may support slightly different sets of features. +Read installation guides for particular distributions for more details. +Refer to the :doc:`OpenVINO Release Policy <../../../about-openvino/release-notes-openvino/release-policy>` +to learn more about the release types. + + +.. tab-set:: + + .. tab-item:: Archive + :name: archive-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :name: archive-lnx-sysreq + + * `CMake 3.13 or higher, 64-bit `__ + * `Python 3.9 - 3.12, 64-bit `__ + * GCC: + + .. tab-set:: + + .. tab-item:: Ubuntu + :sync: ubuntu + + * GCC 9.3.0 (for Ubuntu 20.04), GCC 11.3.0 (for Ubuntu 22.04) or GCC 13.2.0 (for Ubuntu 24.04) + + .. tab-item:: RHEL 8 + :sync: rhel-8 + + * GCC 8.4.1 + + .. tab-item:: CentOS 7 + :sync: centos-7 + + * GCC 8.3.1 + + Use the following instructions to install it: + + Install GCC 8.3.1 via devtoolset-8 + + .. code-block:: sh + + sudo yum update -y && sudo yum install -y centos-release-scl epel-release + sudo yum install -y devtoolset-8 + + Enable devtoolset-8 and check current gcc version + + .. code-block:: sh + + source /opt/rh/devtoolset-8/enable + gcc -v + + .. tab-item:: macOS + :name: archive-win-sysreq + + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default install). + * `Python 3.9 - 3.12 `__ (choose 3.9 - 3.12). Install and add to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory + * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) + + .. tab-item:: Windows + :name: archive-win-sysreq + + * `C++ libraries (included in Visual Studio redistributable) `__ (a core dependency for OpenVINO Runtime) + * `Microsoft Visual Studio 2019 or later `__ (for development and app compilation with OpenVINO) + * `CMake 3.14 or higher, 64-bit `__ (optional, only required for building sample applications) + * `Python 3.9 - 3.12, 64-bit `__ + + .. note:: + + To install Microsoft Visual Studio, follow the `Microsoft Visual Studio installation guide `__. + You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. + + .. note:: + + You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. + + .. important:: + + When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `__ to your `PATH` environment variable. + + .. tab-item:: APT + :sync: apt-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + * `CMake 3.13 or higher, 64-bit `__ + * GCC 9.3.0 (for Ubuntu 20.04), GCC 11.3.0 (for Ubuntu 22.04) or GCC 13.2.0 (for Ubuntu 24.04) + * `Python 3.9 - 3.12, 64-bit `__ + + .. tab-item:: Homebrew + :name: homebrew-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + * `Homebrew `_ + * `CMake 3.13 or higher, 64-bit `__ + * GCC 9.3.0 (for Ubuntu 20.04), GCC 11.3.0 (for Ubuntu 22.04) or GCC 13.2.0 (for Ubuntu 24.04) + * `Python 3.9 - 3.12, 64-bit `__ + + .. tab-item:: macOS + :sync: macos + + * `Homebrew `_ + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default installation). + * `Python 3.9 - 3.12 `__ . Install and add it to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory to install it. + * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) + + .. tab-item:: npm + :name: npm-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + All x86_64 / arm64 architectures are supported. + + * `Node.js version 21.0.0 and higher `__ + + .. tab-item:: macOS + :sync: macos + + All x86_64 / arm64 architectures are supported, however, only for CPU inference. + + * `Node.js version 21.0.0 and higher `__ + + .. tab-item:: Windows + :sync: Windows + + All x86_64 architectures are supported. Windows ARM is not supported. + + * `Node.js version 21.0.0 and higher `__ + + .. tab-item:: YUM + :name: yum-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + OpenVINO RPM packages are compatible with and can be run on the following operating systems: + + * RHEL 8.2 and higher + * Amazon Linux 2022 and 2023 + * Rocky Linux 8.7, 8.8 and 9.2-9.3 + * Alma Linux 8.7, 8.8 and 9.2-9.4 + * Oracle Linux 8.7, 8.8 and 9.2-9.4 + * Fedora 29 and higher up to 41 + * OpenEuler 20.03, 22.03, 23.03 and 24.03 + * Anolis OS 8.6 and 8.8 + * CentOS Stream 8 and 9 + + Software: + + * `CMake 3.13 or higher, 64-bit `_ + * GCC 8.4.1 + * `Python 3.9 - 3.12, 64-bit `_ + + .. tab-item:: ZYPPER + :name: zypper-sysreq + + .. tab-set:: + + .. tab-item:: Linux + :sync: linux + + OpenVINO RPM packages are compatible with and can be run on openSUSE Tumbleweed only. + + Software: + + * `CMake 3.13 or higher, 64-bit `_ + * GCC 8.2.0 + * `Python 3.9 - 3.12, 64-bit `_ The claims stated here may not apply to all use cases and setups. See -:doc:`Legal notices and terms of use <../additional-resources/terms-of-use>` for more information. \ No newline at end of file +:doc:`Legal notices and terms of use <../additional-resources/terms-of-use>` for more information. diff --git a/docs/articles_en/documentation/legacy-features.rst b/docs/articles_en/documentation/legacy-features.rst index f859a3a4572f88..2457d28cf24c15 100644 --- a/docs/articles_en/documentation/legacy-features.rst +++ b/docs/articles_en/documentation/legacy-features.rst @@ -96,7 +96,7 @@ Discontinued: | *New solution:* API 2.0 launched in OpenVINO 2022.1 | *Old solution:* discontinued with OpenVINO 2024.0 - | `The last version supporting API 1.0 `__ + | `2023.2 is the last version supporting API 1.0 `__ .. dropdown:: Compile tool diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst index b5d3c08b39f480..fb9f41c755d4fb 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats.rst @@ -120,7 +120,7 @@ Here are code examples of how to use these methods with different model formats: For more details on conversion, refer to the :doc:`guide <[legacy]-supported-model-formats/[legacy]-convert-tensorflow>` - and an example `tutorial `__ + and an example `tutorial `__ on this topic. * The ``read_model()`` and ``compile_model()`` methods: @@ -592,7 +592,7 @@ to OpenVINO IR or ONNX before running inference should be considered the default OpenVINO versions of 2023 are mostly compatible with the old instructions, through a deprecated MO tool, installed with the deprecated OpenVINO Developer Tools package. - `OpenVINO 2023.0 `__ is the last + `OpenVINO 2023.0 `__ is the last release officially supporting the MO conversion process for the legacy formats. diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst index 711a060b7467b8..7880b261c80b81 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-faster-r-cnn.rst @@ -14,7 +14,7 @@ Converting an ONNX Faster R-CNN Model The instructions below are applicable **only** to the Faster R-CNN model converted to the ONNX file format from the `maskrcnn-benchmark model `__: -1. Download the pretrained model file from `onnx/models `__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117). +1. Download the pretrained model file from `onnx/models `__ (commit-SHA: 8883e49e68de7b43e263d56b9ed156dfa1e03117). 2. Generate the Intermediate Representation of the model, by changing your current working directory to the model conversion API installation directory, and running model conversion with the following parameters: diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst index 84392e92e620d2..4c10c941c7fb47 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-onnx-gpt-2.rst @@ -12,7 +12,7 @@ Converting an ONNX GPT-2 Model This guide describes a deprecated conversion method. The guide on the new and recommended method can be found in the :doc:`Python tutorials <../../../../../../learn-openvino/interactive-tutorials-python>`. -`Public pre-trained GPT-2 model `__ is a large +`Public pre-trained GPT-2 model `__ is a large transformer-based language model with a simple objective: predict the next word, given all of the previous words within some text. Downloading the Pre-Trained Base GPT-2 Model diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst index de3af8ce5175f0..f1ee885dae0b26 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-quartz-net.rst @@ -20,7 +20,7 @@ Downloading the Pre-trained QuartzNet Model To download the pre-trained model, refer to the `NeMo Speech Models Catalog `__. Here are the instructions on how to obtain QuartzNet in ONNX format. -1. Install the NeMo toolkit, using the `instructions `__. +1. Install the NeMo toolkit, using the `instructions `__. 2. Run the following code: diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst index 4f33e510a40267..ad646568aed598 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-conversion-tutorials/convert-pytorch-rnn-t.rst @@ -44,7 +44,7 @@ For UNIX-like systems, you can use ``wget``: The link was taken from ``setup.sh`` in the ``speech_recoginitin/rnnt`` subfolder. You will get exactly the same weights as -if you were following the `guide `__. +if you were following the `guide `__. **Step 4**. Install required Python packages: diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst index 955d5418d37270..2bcb6fde9b833b 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-supported-model-formats/[legacy]-convert-tensorflow.rst @@ -59,7 +59,7 @@ To convert such TensorFlow model, run the `mo` script with a path to the MetaGra 3. **SavedModel format**. In this case, a model consists of a special directory with a ``.pb`` file -and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README `__ file in the TensorFlow repository. +and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README `__ file in the TensorFlow repository. To convert such TensorFlow model, run the ``mo`` script with a path to the SavedModel directory: .. code-block:: sh diff --git a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst index fc78b12640771a..3d2365f45ffe3b 100644 --- a/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst +++ b/docs/articles_en/documentation/legacy-features/transition-legacy-conversion-api/legacy-model-optimizer-extensibility.rst @@ -160,7 +160,7 @@ It is important to mention that sometimes it seems like transformation cannot be because the actual values of inputs or shapes are needed. In fact, manipulations of shapes or values can be implemented using operations that are added to the graph. Consider the ``extensions/front/onnx/flattenONNX_to_reshape.py`` transformation, which replaces an ONNX -`Flatten `__ operation with a sub-graph of operations performing +`Flatten `__ operation with a sub-graph of operations performing the following (when ``axis`` is not equal to 0 and 1): 1. Calculate a shape of the ``Flatten`` input tensor, using the :doc:`ShapeOf <../../openvino-ir-format/operation-sets/operation-specs/shape/shape-of-3>` operation. diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst index 2d5598a5eb8e9d..3959ebefb09a4a 100644 --- a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst +++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst @@ -580,7 +580,7 @@ Building OpenVINO™ Security Add-on depends on OpenVINO™ Model Server docker 1. Download the `OpenVINO™ Model Server software `__ -2. Build the `OpenVINO™ Model Server Docker images `__ +2. Build the `OpenVINO™ Model Server Docker images `__ .. code-block:: sh diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst index a7a81acd9ba3a7..8a5bd91f9c1b7b 100644 --- a/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst +++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-training-extensions.rst @@ -32,9 +32,9 @@ If the results are unsatisfactory, add datasets and perform the same steps, star OpenVINO Training Extensions Components ####################################### -* `OpenVINO Training Extensions API `__ +* `OpenVINO Training Extensions API `__ * `OpenVINO Training Extensions CLI `__ -* `OpenVINO Training Extensions Algorithms `__ +* `OpenVINO Training Extensions Algorithms `__ Tutorials ######### diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst index 6ba9e0a9b60f52..9451fabd6219d8 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst @@ -35,7 +35,7 @@ The goal of Low Precision Transformations (LPT) is to transform a quantized mode As result, operation input tensor precisions will be changed from original to low precision and operations can be inferred by OpenVINO™ plugin in low precision. -For a more detailed description on how to quantize a model, see the `Low precision tools <#low-precision-tools>`__ section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in `this whitepaper `__. +For a more detailed description on how to quantize a model, see the `Low precision tools <#low-precision-tools>`__ section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in `this whitepaper `__. Input model requirements ######################## diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst index a3028755299b45..ed77c97e21b0ee 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets.rst @@ -10,6 +10,7 @@ Available Operation Sets :maxdepth: 1 :hidden: + available-opsets/opset15 available-opsets/opset14 available-opsets/opset13 available-opsets/opset12 @@ -34,7 +35,9 @@ This topic provides a complete list of available sets of operations supported in :header-rows: 1 * - OpenVINO™ Version - - Actual Operations Set + - Operation Set + * - 2024.5 + - :doc:`opset15 ` * - 2024.0 - :doc:`opset14 ` * - 2023.2 @@ -71,4 +74,3 @@ See Also * :doc:`Operation Sets in OpenVINO <../operation-sets>` * :doc:`OpenVINO IR format <../../openvino-ir-format>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst index 55b29d30fa8502..1ce95152c20c90 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset1.rst @@ -66,7 +66,6 @@ Table of Contents * :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -122,5 +121,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst index 04f151a3e26b84..56397ecd852305 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset10.rst @@ -107,7 +107,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -192,4 +192,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst index 6ae5882e2e4fd7..8a918bde06ebb1 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset11.rst @@ -107,7 +107,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -192,4 +192,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst index 6e6ddd61ba1e0e..06cfaa7f8f6a20 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset12.rst @@ -108,7 +108,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -193,4 +193,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst index 87396dfd542203..6bcea43cdeb103 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset13.rst @@ -113,7 +113,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -201,4 +201,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst index 05131a907d15b4..eb967a6fc94e11 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst @@ -115,7 +115,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-14>` diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst new file mode 100644 index 00000000000000..12368033a1fa20 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset15.rst @@ -0,0 +1,216 @@ +opset15 +======= + + +.. meta:: + :description: Explore the examples of operation instances expressed as IR + XML snippets in the opset15 operation set, supported in OpenVINO™ + toolkit. + +This specification document describes the ``opset15`` operation set supported in OpenVINO™. +Support for each particular operation from the list below depends on the capabilities of an inference plugin +and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR xml +snippets. The semantics match corresponding OpenVINO operation classes declared in ``namespace opset15``. + + +Table of Contents +################## + +* :doc:`Abs <../operation-specs/arithmetic/abs-1>` +* :doc:`Acos <../operation-specs/arithmetic/acos-1>` +* :doc:`Acosh <../operation-specs/arithmetic/acosh-3>` +* :doc:`AdaptiveAvgPool <../operation-specs/pooling/adaptive-avg-pool-8>` +* :doc:`AdaptiveMaxPool <../operation-specs/pooling/adaptive-max-pool-8>` +* :doc:`Add <../operation-specs/arithmetic/add-1>` +* :doc:`Asin <../operation-specs/arithmetic/asin-1>` +* :doc:`Asinh <../operation-specs/arithmetic/asinh-3>` +* :doc:`Assign <../operation-specs/infrastructure/assign-3>` +* :doc:`Atan <../operation-specs/arithmetic/atan-1>` +* :doc:`Atanh <../operation-specs/arithmetic/atanh-3>` +* :doc:`AvgPool <../operation-specs/pooling/avg-pool-14>` +* :doc:`BatchNormInference <../operation-specs/normalization/batch-norm-inference-5>` +* :doc:`BatchToSpace <../operation-specs/movement/batch-to-space-2>` +* :doc:`BinaryConvolution <../operation-specs/convolution/binary-convolution-1>` +* :doc:`BitwiseAnd <../operation-specs/bitwise/bitwise-and-13>` +* :doc:`BitwiseOr <../operation-specs/bitwise/bitwise-or-13>` +* :doc:`BitwiseXor <../operation-specs/bitwise/bitwise-xor-13>` +* :doc:`BitwiseLeftShift <../operation-specs/bitwise/bitwise-left-shift-15>` +* :doc:`BitwiseRightShift <../operation-specs/bitwise/bitwise-right-shift-15>` +* :doc:`BitwiseNot <../operation-specs/bitwise/bitwise-not-13>` +* :doc:`Broadcast <../operation-specs/movement/broadcast-3>` +* :doc:`Bucketize <../operation-specs/condition/bucketize-3>` +* :doc:`CTCGreedyDecoder <../operation-specs/sequence/ctc-greedy-decoder-1>` +* :doc:`CTCGreedyDecoderSeqLen <../operation-specs/sequence/ctc-greedy-decoder-seq-len-6>` +* :doc:`CTCLoss <../operation-specs/sequence/ctc-loss-4>` +* :doc:`Ceiling <../operation-specs/arithmetic/ceiling-1>` +* :doc:`Clamp <../operation-specs/activation/clamp-1>` +* :doc:`Col2Im <../operation-specs/movement/col2im-15>` +* :doc:`Concat <../operation-specs/movement/concat-1>` +* :doc:`Constant <../operation-specs/infrastructure/constant-1>` +* :doc:`Convert <../operation-specs/type/convert-1>` +* :doc:`ConvertLike <../operation-specs/type/convert-like-1>` +* :doc:`ConvertPromoteTypes <../operation-specs/type/convert-promote-types-14>` +* :doc:`Convolution <../operation-specs/convolution/convolution-1>` +* :doc:`ConvolutionBackpropData <../operation-specs/convolution/convolution-backprop-data-1>` +* :doc:`Cos <../operation-specs/arithmetic/cos-1>` +* :doc:`Cosh <../operation-specs/arithmetic/cosh-1>` +* :doc:`CumSum <../operation-specs/arithmetic/cumsum-3>` +* :doc:`DeformableConvolution <../operation-specs/convolution/deformable-convolution-8>` +* :doc:`DeformablePSROIPooling <../operation-specs/detection/deformable-psroi-pooling-1>` +* :doc:`DepthToSpace <../operation-specs/movement/depth-to-space-1>` +* :doc:`DetectionOutput <../operation-specs/detection/detectionoutput-8>` +* :doc:`DFT <../operation-specs/signals/dft-7>` +* :doc:`Divide <../operation-specs/arithmetic/divide-1>` +* :doc:`Einsum <../operation-specs/matrix/einsum-7>` +* :doc:`Elu <../operation-specs/activation/elu-1>` +* :doc:`EmbeddingBagOffsetsSum <../operation-specs/sparse/embedding-bag-offsets-sum-3>` +* :doc:`EmbeddingBagOffsets <../operation-specs/sparse/embedding-bag-offsets-15>` +* :doc:`EmbeddingBagPackedSum <../operation-specs/sparse/embedding-bag-packed-sum-3>` +* :doc:`EmbeddingBagPacked <../operation-specs/sparse/embedding-bag-packed-15>` +* :doc:`EmbeddingSegmentsSum <../operation-specs/sparse/embedding-segments-sum-3>` +* :doc:`Equal <../operation-specs/comparison/equal-1>` +* :doc:`Erf <../operation-specs/arithmetic/erf-1>` +* :doc:`Exp <../operation-specs/activation/exp-1>` +* :doc:`ExperimentalDetectronDetectionOutput_6 <../operation-specs/detection/experimental-detectron-detection-output-6>` +* :doc:`ExperimentalDetectronGenerateProposalsSingleImage_6 <../operation-specs/detection/experimental-detectron-generate-proposals-single-image-6>` +* :doc:`ExperimentalDetectronPriorGridGenerator_6 <../operation-specs/detection/experimental-detectron-prior-grid-generator-6>` +* :doc:`ExperimentalDetectronROIFeatureExtractor_6 <../operation-specs/detection/experimental-detectron-roi-feature-extractor-6>` +* :doc:`ExperimentalDetectronTopKROIs_6 <../operation-specs/sort/experimental-detectron-top-krois-6>` +* :doc:`ExtractImagePatches <../operation-specs/movement/extract-image-patches-3>` +* :doc:`Eye <../operation-specs/generation/eye-9>` +* :doc:`FakeConvert <../operation-specs/quantization/fake-convert-13>` +* :doc:`FakeQuantize <../operation-specs/quantization/fake-quantize-1>` +* :doc:`Floor <../operation-specs/arithmetic/floor-1>` +* :doc:`FloorMod <../operation-specs/arithmetic/floormod-1>` +* :doc:`Gather <../operation-specs/movement/gather-8>` +* :doc:`GatherElements <../operation-specs/movement/gather-elements-6>` +* :doc:`GatherND <../operation-specs/movement/gather-nd-8>` +* :doc:`GatherTree <../operation-specs/movement/gather-tree-1>` +* :doc:`Gelu <../operation-specs/activation/gelu-7>` +* :doc:`GenerateProposals <../operation-specs/detection/generate-proposals-9>` +* :doc:`Greater <../operation-specs/comparison/greater-1>` +* :doc:`GreaterEqual <../operation-specs/comparison/greater-equal-1>` +* :doc:`GridSample <../operation-specs/image/grid-sample-9>` +* :doc:`GRN <../operation-specs/normalization/grn-1>` +* :doc:`GroupConvolution <../operation-specs/convolution/group-convolution-1>` +* :doc:`GroupConvolutionBackpropData <../operation-specs/convolution/group-convolution-backprop-data-1>` +* :doc:`GroupNormalization <../operation-specs/normalization/group-normalization-12>` +* :doc:`GRUCell <../operation-specs/sequence/gru-cell-3>` +* :doc:`GRUSequence <../operation-specs/sequence/gru-sequence-5>` +* :doc:`HardSigmoid <../operation-specs/activation/hard-sigmoid-1>` +* :doc:`HSigmoid <../operation-specs/activation/hsigmoid-5>` +* :doc:`HSwish <../operation-specs/activation/hswish-4>` +* :doc:`IDFT <../operation-specs/signals/idft-7>` +* :doc:`I420toBGR <../operation-specs/image/i420-to-bgr-8>` +* :doc:`I420toRGB <../operation-specs/image/i420-to-rgb-8>` +* :doc:`If <../operation-specs/condition/if-8>` +* :doc:`Interpolate <../operation-specs/image/interpolate-11>` +* :doc:`Inverse <../operation-specs/matrix/inverse-14>` +* :doc:`IRDFT <../operation-specs/signals/irdft-9>` +* :doc:`IsInf <../operation-specs/comparison/isinf-10>` +* :doc:`IsNaN <../operation-specs/comparison/isnan-10>` +* :doc:`Less <../operation-specs/comparison/less-1>` +* :doc:`LessEqual <../operation-specs/comparison/lessequal-1>` +* :doc:`Log <../operation-specs/arithmetic/log-1>` +* :doc:`LogicalAnd <../operation-specs/logical/logical-and-1>` +* :doc:`LogicalNot <../operation-specs/logical/logical-not-1>` +* :doc:`LogicalOr <../operation-specs/logical/logical-or-1>` +* :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` +* :doc:`LogSoftmax <../operation-specs/activation/log-soft-max-5>` +* :doc:`Loop <../operation-specs/infrastructure/loop-5>` +* :doc:`LRN <../operation-specs/normalization/lrn-1>` +* :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` +* :doc:`MatMul <../operation-specs/matrix/matmul-1>` +* :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` +* :doc:`MaxPool <../operation-specs/pooling/max-pool-14>` +* :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` +* :doc:`Minimum <../operation-specs/arithmetic/minimum-1>` +* :doc:`Mish <../operation-specs/activation/mish-4>` +* :doc:`Mod <../operation-specs/arithmetic/mod-1>` +* :doc:`MVN <../operation-specs/normalization/mvn-6>` +* :doc:`MulticlassNMS <../operation-specs/sort/multiclass-non-max-suppression-9>` +* :doc:`Multinomial <../operation-specs/generation/multinomial-13>` +* :doc:`Multiply <../operation-specs/arithmetic/multiply-1>` +* :doc:`Negative <../operation-specs/arithmetic/negative-1>` +* :doc:`NMSRotated <../operation-specs/sort/nms-rotated-13>` +* :doc:`NonMaxSuppression <../operation-specs/sort/non-max-suppression-9>` +* :doc:`NonZero <../operation-specs/condition/nonzero-3>` +* :doc:`NormalizeL2 <../operation-specs/normalization/normalize-l2-1>` +* :doc:`NotEqual <../operation-specs/comparison/notequal-1>` +* :doc:`NV12toBGR <../operation-specs/image/nv12-to-bgr-8>` +* :doc:`NV12toRGB <../operation-specs/image/nv12-to-rgb-8>` +* :doc:`OneHot <../operation-specs/sequence/one-hot-1>` +* :doc:`Pad <../operation-specs/movement/pad-12>` +* :doc:`Parameter <../operation-specs/infrastructure/parameter-1>` +* :doc:`Power <../operation-specs/arithmetic/power-1>` +* :doc:`PReLU <../operation-specs/activation/prelu-1>` +* :doc:`PriorBoxClustered <../operation-specs/detection/prior-box-clustered-1>` +* :doc:`PriorBox <../operation-specs/detection/prior-box-8>` +* :doc:`Proposal <../operation-specs/detection/proposal-4>` +* :doc:`PSROIPooling <../operation-specs/detection/psroi-pooling-1>` +* :doc:`RandomUniform <../operation-specs/generation/random-uniform-8>` +* :doc:`Range <../operation-specs/generation/range-4>` +* :doc:`RDFT <../operation-specs/signals/rdft-9>` +* :doc:`ReLU <../operation-specs/activation/relu-1>` +* :doc:`ReadValue <../operation-specs/infrastructure/read-value-3>` +* :doc:`ReduceL1 <../operation-specs/reduction/reduce-l1-4>` +* :doc:`ReduceL2 <../operation-specs/reduction/reduce-l2-4>` +* :doc:`ReduceLogicalAnd <../operation-specs/reduction/reduce-logical-and-1>` +* :doc:`ReduceLogicalOr <../operation-specs/reduction/reduce-logical-or-1>` +* :doc:`ReduceMax <../operation-specs/reduction/reduce-max-1>` +* :doc:`ReduceMean <../operation-specs/reduction/reduce-mean-1>` +* :doc:`ReduceMin <../operation-specs/reduction/reduce-min-1>` +* :doc:`ReduceProd <../operation-specs/reduction/reduce-prod-1>` +* :doc:`ReduceSum <../operation-specs/reduction/reduce-sum-1>` +* :doc:`RegionYolo <../operation-specs/detection/region-yolo-1>` +* :doc:`ReorgYolo <../operation-specs/detection/reorg-yolo-1>` +* :doc:`Reshape <../operation-specs/shape/reshape-1>` +* :doc:`Result <../operation-specs/infrastructure/result-1>` +* :doc:`ReverseSequence <../operation-specs/movement/reverse-sequence-1>` +* :doc:`RNNCell <../operation-specs/sequence/rnn-cell-3>` +* :doc:`RNNSequence <../operation-specs/sequence/rnn-sequence-5>` +* :doc:`ROIAlign <../operation-specs/detection/roi-align-9>` +* :doc:`ROIAlignRotated <../operation-specs/detection/roi-align-rotated-15>` +* :doc:`ROIPooling <../operation-specs/detection/roi-pooling-1>` +* :doc:`Roll <../operation-specs/movement/roll-7>` +* :doc:`Round <../operation-specs/arithmetic/round-5>` +* :doc:`ScaledDotProductAttention <../operation-specs/sequence/scaled-dot-product-attention>` +* :doc:`ScatterElementsUpdate <../operation-specs/movement/scatter-elements-update-12>` +* :doc:`ScatterNDUpdate <../operation-specs/movement/scatter-nd-update-15>` +* :doc:`ScatterUpdate <../operation-specs/movement/scatter-update-3>` +* :doc:`SearchSorted <../operation-specs/sort/search-sorted-15>` +* :doc:`Select <../operation-specs/condition/select-1>` +* :doc:`Selu <../operation-specs/activation/selu-1>` +* :doc:`ShapeOf <../operation-specs/shape/shape-of-3>` +* :doc:`ShuffleChannels <../operation-specs/movement/shuffle-channels-1>` +* :doc:`Sigmoid <../operation-specs/activation/sigmoid-1>` +* :doc:`Sign <../operation-specs/arithmetic/sign-1>` +* :doc:`Sin <../operation-specs/arithmetic/sin-1>` +* :doc:`Sinh <../operation-specs/arithmetic/sinh-1>` +* :doc:`Slice <../operation-specs/movement/slice-8>` +* :doc:`SliceScatter <../operation-specs/movement/slice-scatter-15>` +* :doc:`SoftMax <../operation-specs/activation/softmax-8>` +* :doc:`SoftPlus <../operation-specs/activation/softplus-4>` +* :doc:`SoftSign <../operation-specs/activation/softsign-9>` +* :doc:`SpaceToBatch <../operation-specs/movement/space-to-batch-2>` +* :doc:`SpaceToDepth <../operation-specs/movement/space-to-depth-1>` +* :doc:`Split <../operation-specs/movement/split-1>` +* :doc:`Sqrt <../operation-specs/arithmetic/sqrt-1>` +* :doc:`SquaredDifference <../operation-specs/arithmetic/squared-difference-1>` +* :doc:`Squeeze <../operation-specs/shape/squeeze-15>` +* :doc:`STFT <../operation-specs/signals/stft-15>` +* :doc:`StridedSlice <../operation-specs/movement/strided-slice-1>` +* :doc:`StringTensorPack <../operation-specs/type/string-tensor-pack-15>` +* :doc:`StringTensorUnpack <../operation-specs/type/string-tensor-unpack-15>` +* :doc:`Subtract <../operation-specs/arithmetic/subtract-1>` +* :doc:`Swish <../operation-specs/activation/swish-4>` +* :doc:`Tan <../operation-specs/arithmetic/tan-1>` +* :doc:`Tanh <../operation-specs/arithmetic/tanh-1>` +* :doc:`TensorIterator <../operation-specs/infrastructure/tensor-iterator-1>` +* :doc:`Tile <../operation-specs/movement/tile-1>` +* :doc:`TopK <../operation-specs/sort/top-k-11>` +* :doc:`Transpose <../operation-specs/movement/transpose-1>` +* :doc:`Unique <../operation-specs/movement/unique-10>` +* :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` +* :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst index 5ea1c3bde54018..41170ad2036a25 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset2.rst @@ -68,7 +68,6 @@ Table of Contents * :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -128,5 +127,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst index fcce876e6944df..9b2992db18cc36 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset3.rst @@ -76,7 +76,6 @@ Table of Contents * :doc:`LogicalXor <../operation-specs/logical/logical-xor-1>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -144,4 +143,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst index 0c0ae150b1d21d..fdada28b6bdc06 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset5.rst @@ -86,7 +86,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -162,5 +162,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst index 8d626c71be2d89..d587370800aa9e 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset6.rst @@ -93,7 +93,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -168,4 +168,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst index 7dd5d8fbba14d7..6de9bccf5921b7 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset7.rst @@ -96,7 +96,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-1>` * :doc:`Maximum <../operation-specs/arithmetic/maximum-1>` @@ -172,4 +172,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst index 0bb5b336f9e2f2..f35299c9f4c97a 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset8.rst @@ -101,7 +101,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -183,5 +183,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst index ebb5dfdbdc6555..2421e4b9bc53c1 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset9.rst @@ -105,7 +105,7 @@ Table of Contents * :doc:`Loop <../operation-specs/infrastructure/loop-5>` * :doc:`LRN <../operation-specs/normalization/lrn-1>` * :doc:`LSTMCell <../operation-specs/sequence/lstm-cell-1>` -* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-1>` +* :doc:`LSTMSequence <../operation-specs/sequence/lstm-sequence-5>` * :doc:`MatMul <../operation-specs/matrix/matmul-1>` * :doc:`MatrixNMS <../operation-specs/sort/matrix-non-max-suppression-8>` * :doc:`MaxPool <../operation-specs/pooling/max-pool-8>` @@ -189,4 +189,3 @@ Table of Contents * :doc:`Transpose <../operation-specs/movement/transpose-1>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst index 7ac47116595621..9f7badf1a8d06a 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst @@ -29,8 +29,10 @@ Operation Specifications BatchToSpace-2 BinaryConvolution-1 BitwiseAnd-13 + BitwiseLeftShift-15 BitwiseNot-13 BitwiseOr-13 + BitwiseRightShift-15 BitwiseXor-13 Broadcast-1 Broadcast-3 @@ -39,6 +41,7 @@ Operation Specifications CTCGreedyDecoderSeqLen-6 Ceiling-1 Clamp-1 + Col2Im-15 Concat-1 Constant-1 ConvertLike-1 @@ -61,7 +64,9 @@ Operation Specifications Einsum-7 Elu-1 EmbeddingBagOffsetsSum-3 + EmbeddingBagOffsets-15 EmbeddingBagPackedSum-3 + EmbeddingBagPacked-15 EmbeddingSegmentsSum-3 Equal-1 Erf-1 @@ -189,6 +194,7 @@ Operation Specifications RNNSequence-5 ROIAlign-3 ROIAlign-9 + ROIAlignRotated-15 ROIPooling-1 Roll-7 Round-5 @@ -196,6 +202,7 @@ Operation Specifications ScatterElementsUpdate-3 ScatterElementsUpdate-12 ScatterNDUpdate-3 + ScatterNDUpdate-15 ScatterUpdate-3 SearchSorted-15 Select-1 @@ -208,6 +215,7 @@ Operation Specifications Sin-1 Sinh-1 Slice-8 + SliceScatter-15 SoftMax-1 SoftMax-8 SoftPlus-4 @@ -218,7 +226,11 @@ Operation Specifications Sqrt-1 SquaredDifference-1 Squeeze-1 + Squeeze-15 + STFT-15 StridedSlice-1 + StringTensorPack-15 + StringTensorUnpack-15 Subtract-1 Swish-4 Tan-1 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/activation/identity-16.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/activation/identity-16.rst new file mode 100644 index 00000000000000..19a40ecfb86ba9 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/activation/identity-16.rst @@ -0,0 +1,48 @@ +Identity +======== + + +.. meta:: + :description: Learn about Identity-16 - a simple operation that forwards the input to the output. + +**Versioned name**: *Identity-16* + +**Category**: *Activation* + +**Short description**: The *Identity* operation forwards the input to the output. + +**Detailed description**: The *Identity* operation generates a new tensor that mirrors the input tensor in shape, data type, and content, effectively implementing the linear activation function f(x) = x. +If the input and output tensor data address is the same, input is returned as output instead. + +**Input**: + +* **1**: `input` - A tensor of any shape and type `T`. **Required.** + +**Output**: + +* **1**: `output` - A tensor with the same shape and type `T` as the input, containing the same data as the input. + +**Types** + +* **T**: any supported data type. + +*Example 1: 2D input matrix.* + +.. code-block:: xml + :force: + + + + + + 3 + 3 + + + + + 3 + 3 + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst index 5cc1b024f158b1..f02c5414ac4369 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/infrastructure/loop-5.rst @@ -11,7 +11,7 @@ Loop **Category**: *Infrastructure* **Short description**: *Loop* operation performs recurrent execution of the network, which is described in the ``body``, iterating through the data. -The operation has similar semantic to the ONNX Loop `operation `__. +The operation has similar semantic to the ONNX Loop `operation `__. **Detailed description** @@ -73,7 +73,7 @@ Loop operation description in the IR also has several special sections: ``body`` 1. The body operation getting an input from the main graph should have an entry in the ``port_map`` section of the Loop operation. These edges connect input ports of the Loop with the body ``Parameter``\ s. 2. Input tensors to the Loop can be sliced along a specified axis, the Loop can iterates over all sliced parts. The corresponding ``input`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to slice. Therefore, inputs to the Loop operation corresponding to ``input`` entries in the ``port_map`` without ``axis`` attribute are used "as is" (without slicing). 3. The body operation producing tensor to be used in the subsequent iterations (like in RNN models) should have a back edge described in the ``back_edges`` section of the operation. The back edge connects the respective body ``Parameter`` and ``Result`` operations. For such a case the Loop operation node provides input for the first iteration, while corresponding Loop operation output produces the tensor computed during the last iteration. -4. Output tensors produced by a particular body operation across all iterations can be concatenated and returned as a Loop operation output (this is a "scan output" according to the ONNX* Loop operation `specification `__ ). The corresponding ``output`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to concatenate. Therefore, outputs from operations corresponding to ``output`` entries in the ``port_map`` without ``axis`` attribute are returned "as is" (without concatenation). +4. Output tensors produced by a particular body operation across all iterations can be concatenated and returned as a Loop operation output (this is a "scan output" according to the ONNX* Loop operation `specification `__ ). The corresponding ``output`` entry in the ``port_map`` should have ``axis`` attribute specifying the axis to concatenate. Therefore, outputs from operations corresponding to ``output`` entries in the ``port_map`` without ``axis`` attribute are returned "as is" (without concatenation). 5. There is one body ``Parameter`` operation not connected through the ``port_map``. This is a "current iteration" input. The Loop operation is responsible for providing the appropriate value for each iteration. 6. Connection of nodes inside the Loop body with the main graph should be done through ``Parameter`` and ``Result`` body operations. No other ways to connect graphs are allowed. diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst index 28dbec46289f89..f58418ee923a8b 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-cell-3.rst @@ -64,7 +64,7 @@ GRUCell * *linear_before_reset* * **Description**: *linear_before_reset* flag denotes if the layer behaves according to the modification - of *GRUCell* described in the formula in the `ONNX documentation `__. + of *GRUCell* described in the formula in the `ONNX documentation `__. * **Range of values**: true or false * **Type**: ``boolean`` * **Default value**: false diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst index 37c70087e121ea..f9b9a5ece850ec 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/gru-sequence-5.rst @@ -19,7 +19,7 @@ represents a sequence of GRU cells. The sequence can be connected differently de ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX GRU operator defined -`GRUCell `__ +`GRUCell `__ **Attributes** @@ -69,7 +69,7 @@ are in sync with the specification of ONNX GRU operator defined * *linear_before_reset* * **Description**: *linear_before_reset* flag denotes if the layer behaves according to the modification - of *GRUCell* described in the formula in the `ONNX documentation `__. + of *GRUCell* described in the formula in the `ONNX documentation `__. * **Range of values**: True or False * **Type**: ``boolean`` * **Default value**: False diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-1.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst similarity index 97% rename from docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-1.rst rename to docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst index dcc21ca5321451..164033bdd2831c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-1.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst @@ -3,10 +3,10 @@ LSTMSequence .. meta:: - :description: Learn about LSTMSequence-1 - a sequence processing operation, which + :description: Learn about LSTMSequence-5 - a sequence processing operation, which can be performed on seven required input tensors. -**Versioned name**: *LSTMSequence-1* +**Versioned name**: *LSTMSequence-5* **Category**: *Sequence processing* @@ -14,7 +14,7 @@ LSTMSequence **Detailed description** -A single cell in the sequence is implemented in the same way as in :doc:`LSTM Cell ` operation. *LSTMSequence* represents a sequence of LSTM cells. The sequence can be connected differently depending on ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX LSTM operator defined `LSTMCell `__ . +A single cell in the sequence is implemented in the same way as in :doc:`LSTM Cell ` operation. *LSTMSequence* represents a sequence of LSTM cells. The sequence can be connected differently depending on ``direction`` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX LSTM operator defined `LSTMCell `__ . **Attributes** @@ -145,4 +145,3 @@ A single cell in the sequence is implemented in the same way as in :doc:`LSTM Ce - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst index fc9829dd999bda..a3dfc062de2dcd 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/rnn-sequence-5.rst @@ -14,7 +14,7 @@ RNNSequence **Detailed description** -A single cell in the sequence is implemented in the same way as in :doc:`RNNCell ` operation. *RNNSequence* represents a sequence of RNN cells. The sequence can be connected differently depending on `direction` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX RNN operator defined `RNNCell `__. +A single cell in the sequence is implemented in the same way as in :doc:`RNNCell ` operation. *RNNSequence* represents a sequence of RNN cells. The sequence can be connected differently depending on `direction` attribute that specifies the direction of traversing of input data along sequence dimension or specifies whether it should be a bidirectional sequence. The most of the attributes are in sync with the specification of ONNX RNN operator defined `RNNCell `__. **Attributes** diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-15.rst new file mode 100644 index 00000000000000..1e112dce118e26 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/shape/squeeze-15.rst @@ -0,0 +1,174 @@ +Squeeze +======= + + +.. meta:: + :description: Learn about Squeeze-15 - a shape manipulation operation, which + can be performed on one required and one optional input tensor. + +**Versioned name**: *Squeeze-15* + +**Category**: *Shape manipulation* + +**Short description**: *Squeeze* removes dimensions equal to 1 from the first input tensor. + +**Detailed description**: *Squeeze* can be used with or without the second input tensor. + +* If only the first input is provided, every dimension that is equal to 1 will be removed from it. +* With the second input provided, each value is an index of a dimension from the first tensor that is to be removed. Specified dimension should be equal to 1, otherwise it will be ignored and copied as is. + Dimension indices can be specified directly, or by negative indices (counting dimensions from the end). + +.. note:: + + - If index of the dimension to squeeze is provided as a constant input and it points to a dynamic dimension that might be `1`, and the *allow_axis_skip* attribute is ``false``, then the dimension is considered as squeezable. Therefore the rank of the output shape will be reduced, but not dynamic. If dynamic rank is expected for such case, *allow_axis_skip* attribute need to be set to ``true``. + - If the input with indices is empty or not provided, dynamic dimension compatible with `1` leads to dynamic rank of the output shape. + + +**Attributes**: + +* *allow_axis_skip* + + * **Description**: If true, shape inference results in a dynamic rank if selected axis has value 1 in its dimension range. + * **Range of values**: ``false`` or ``true`` + * **Type**: ``boolean`` + * **Required**: *no* + * **Default value**: ``false`` + +**Inputs**: + +* **1**: Multidimensional input tensor of type *T*. **Required.** + +* **2**: Scalar or 1D tensor of type *T_INT* with indices of dimensions to squeeze. Values could be negative (have to be from range ``[-R, R-1]``, where ``R`` is the rank of the first input). **Optional.** + +**Outputs**: + +* **1**: Tensor with squeezed values of type *T*. + +**Types** + +* *T*: any numeric type. + +* *T_INT*: any supported integer type. + +**Example** + +*Example 1: squeeze 4D tensor to a 2D tensor* + +.. code-block:: xml + :force: + + + + + + 1 + 3 + 1 + 2 + + + + + 2 + + + + + 3 + 2 + + + + +*Example 2: squeeze 1D tensor with 1 element to a 0D tensor (constant)* + +.. code-block:: xml + :force: + + + + + + 1 + + + + + 1 + + + + + + + + +*Example 3: squeeze 1D tensor with 1 dynamic shape element to a fully dynamic shape* + +.. code-block:: xml + :force: + + + + + + -1 + + + + + 1 + + + + + + + +*Example 4: squeeze 2D tensor with dynamic and static shape elements to a static shape output, according to the opset1 rules* + +.. code-block:: xml + :force: + + + + + + 2 + -1 + + + + + 1 + + + + + 2 + + + + +*Example 5: squeeze 2D tensor with dynamic and static shape elements to a dynamic shape output, according to the opset15 rules* + +.. code-block:: xml + :force: + + + + + + 2 + -1 + + + + + 1 + + + + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst index 4a41df7214317c..bcc420f5db25c9 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/signals/stft-15.rst @@ -14,14 +14,14 @@ Short Time Fourier Transformation for real-valued input (STFT) **Short description**: *STFT* operation performs Short-Time Fourier Transform (real-to-complex). -**Detailed description**: *STFT* performs Short-Time Fourier Transform of real-valued batched input tensor of shape ``[batch, signal_size]``, and produces complex result represented by separate values for real and imaginary part. +**Detailed description**: *STFT* performs Short-Time Fourier Transform of real-valued input tensor of shape ``[signal_size]`` or ``[batch, signal_size]``, and produces complex result represented by separate values for real and imaginary part. **Attributes**: -* *transform_frames* +* *transpose_frames* - * **Description**: Flag to set output shape layout. If true the ``frames`` dimension is at out_shape[2], otherwise it is at out_shape[1]. + * **Description**: Flag to set output shape layout. If true the ``frames`` dimension is at out_shape[-2], otherwise it is at out_shape[-3]. * **Range of values**: * ``false`` - do not transpose output shape @@ -31,25 +31,25 @@ Short Time Fourier Transformation for real-valued input (STFT) **Inputs** -* **1**: ``signal`` - Tensor of type *T* and 2D shape [batch, signal_size] with signal data for the STFT. **Required.** -* **2**: ``window`` - Tensor of type *T* and 1D shape [window_length], specifying the window values for the signal slice multiplication. **Required.** -* **3**: ``frame_size`` - Scalar tensor of type *T_INT* describing the size of a single frame of the signal to be provided as input to FFT. **Required.** -* **4**: ``frame_step`` - Scalar tensor of type *T_INT* describing The distance (number of samples) between successive frames. **Required.** +* **1**: ``signal`` - Tensor of type *T* and 1D shape [signal_size] or 2D shape [batch, signal_size] with signal data for the STFT. **Required.** +* **2**: ``window`` - Tensor of type *T* and 1D shape [window_length], specifying the window values for the signal slice multiplication. **Required.** +* **3**: ``frame_size`` - Scalar tensor of type *T_INT* describing the size of a single frame of the signal to be provided as input to FFT. **Required.** +* **4**: ``frame_step`` - Scalar tensor of type *T_INT* describing The distance (number of samples) between successive frames. **Required.** **Outputs** -* **1**: The result of STFT operation, tensor of the same type as input ``signal`` tensor and shape: +* **1**: The result of STFT operation, tensor of the same type as input ``signal`` tensor and shape: - + When ``transform_frames == false`` the output shape is ``[batch, frames, fft_results, 2]`` - + When ``transform_frames == true`` the output shape is ``[batch, fft_results, frames, 2]`` + * When ``transpose_frames == false`` the output shape is ``[frames, fft_results, 2]`` for 1D signal input or ``[batch, frames, fft_results, 2]`` for 2D signal input. + * When ``transpose_frames == true`` the output shape is ``[fft_results, frames, 2]`` for 1D signal input or ``[batch, fft_results, frames, 2]`` for 2D signal input. - where: + where: - + ``batch`` is a batch size dimension - + ``frames`` is a number calculated as ``(signal_shape[1] - frame_size) / frame_step) + 1`` - + ``fft_results`` is a number calculated as ``(frame_size / 2) + 1`` - + ``2`` is the last dimension is for complex value real and imaginary part + * ``batch`` is a batch size dimension + * ``frames`` is a number calculated as ``(signal_shape[-1] - frame_size) / frame_step) + 1`` + * ``fft_results`` is a number calculated as ``(frame_size / 2) + 1`` + * ``2`` is the last dimension is for complex value real and imaginary part **Types** @@ -59,27 +59,109 @@ Short Time Fourier Transformation for real-valued input (STFT) * *T_INT*: ``int64`` or ``int32``. -**Example**: +**Examples**: + +*Example 1D signal, transpose_frames=false:* .. code-block:: xml :force: + + 56 + + + 7 + + + + + + 16 + 6 2 - 48 + + + + + +*Example 1D signal, transpose_frames=true:* + +.. code-block:: xml + :force: + + + + + + 56 - 8 + 7 - - + + - + + 6 + 16 2 - 9 - 9 + + + + +*Example 2D signal, transpose_frames=false:* + +.. code-block:: xml + :force: + + + + + + 3 + 56 + + + 7 + + + + + + 3 + 16 + 6 + 2 + + + + + +*Example 2D signal, transpose_frames=true:* + +.. code-block:: xml + :force: + + + + + + 3 + 56 + + + 7 + + + + + + 3 + 6 + 16 2 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst index 81c592d3341a35..7a623a1e16739c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst @@ -17,29 +17,32 @@ SearchSorted **Attributes** -* *right* +* *right_mode* - * **Description**: If False, set the first suitable index. If True, return the last suitable index for given value. Default is False. - * **Range of values**: true or false - * **Type**: boolean + * **Description**: flag to control whether output would contain leftmost or rightmost indices for given values. + * **Range of values**: + + * *true* - return the rightmost (last) suitable index for given value. + * *false* - return the leftmost (first) suitable index for given value. + * **Type**: ``boolean`` * **Default value**: false * **Required**: *no* **Inputs**: -* **1**: ``sorted`` - ND input tensor of type *T* - cannot be a scalar, containing monotonically increasing sequence on the innermost dimension. **Required.** +* **1**: ``sorted_sequence`` - ND input tensor of type *T* - cannot be a scalar, containing monotonically increasing sequence on the innermost dimension. **Required.** * **2**: ``values`` - ND input tensor of type *T*, containing the search values. If sorted sequence is 1D, then the values can have any shape, otherwise the rank should be equal to the rank of sorted input. **Required.** **Outputs**: -* **1**: Tensor of type *TOut*, with the same shape as second input tensor, containing the indices. +* **1**: Tensor of type *T_IND*, with the same shape as second input tensor ``values``, containing the indices. **Types** * *T*: any supported floating-point and integer type. -* *TOut*: int64. +* *T_IND*: ``int64``. **Example** @@ -47,7 +50,7 @@ SearchSorted :force: - + 7 @@ -63,7 +66,7 @@ SearchSorted - + 7 256 200 diff --git a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst index dc43881780b1e6..e10a67fddadb53 100644 --- a/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst +++ b/docs/articles_en/get-started/configurations/configurations-intel-gpu.rst @@ -37,7 +37,7 @@ Below are the instructions on how to install the OpenCL packages on supported Li and install the apt package `ocl-icd-libopencl1` with the OpenCl ICD loader. Alternatively, you can add the apt repository by following the - `installation guide `__. + `installation guide `__. Then install the `ocl-icd-libopencl1`, `intel-opencl-icd`, `intel-level-zero-gpu` and `level-zero` apt packages: diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index 6dda915af93b69..7f26ab9ec72c9f 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -30,26 +30,6 @@ All currently supported versions are: * 2023.3 (LTS) * 2022.3 (LTS) -.. dropdown:: Distributions and Device Support - - Different OpenVINO distributions may support slightly different sets of features. - Read installation guides for particular distributions for more details. - Refer to the :doc:`OpenVINO Release Policy <../../../about-openvino/release-notes-openvino/release-policy>` - to learn more about the release types. - - .. dropdown:: Distribution Comparison for OpenVINO 2024.4 - - =============== ========== ====== =============== ======== ============ ========== ========== ========== - Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm - =============== ========== ====== =============== ======== ============ ========== ========== ========== - CPU V V V V V V V V - GPU V V V V V V V V - NPU V\* V\* V\ * n/a n/a n/a n/a V\* - =============== ========== ====== =============== ======== ============ ========== ========== ========== - - | \* **Of the Linux systems, versions 22.04 and 24.04 include drivers for NPU.** - | **For Windows, CPU inference on ARM64 is not supported.** - .. dropdown:: Effortless GenAI integration with OpenVINO GenAI Flavor A new OpenVINO GenAI Flavor streamlines application development by providing diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst index 511ecc627d45ad..d03fa8fe8b0dc5 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst @@ -14,28 +14,8 @@ Install Intel® Distribution of OpenVINO™ Toolkit for Linux Using APT Reposito * is dedicated to Linux users only * additionally includes code samples - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software Requirements - :sync: software-requirements - - * `CMake 3.13 or higher, 64-bit `__ - * GCC 7.5.0 (for Ubuntu 18.04), GCC 9.3.0 (for Ubuntu 20.04) or GCC 11.3.0 (for Ubuntu 22.04) - * `Python 3.9 - 3.12, 64-bit `__ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ####################################### @@ -57,7 +37,7 @@ Step 1: Set Up the OpenVINO Toolkit APT Repository .. code-block:: sh - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo gpg --output /etc/apt/trusted.gpg.d/intel.gpg --dearmor GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB .. note:: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst index 5b40422115179c..e777c06253a37a 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst @@ -30,58 +30,8 @@ Install OpenVINO™ Runtime on Linux from an Archive File RHEL8 x86_64 V V n/a =================== ===== ===== ===== -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `CMake 3.13 or higher, 64-bit `__ - * `Python 3.9 - 3.12, 64-bit `__ - * GCC: - - .. tab-set:: - - .. tab-item:: Ubuntu 20.04 - :sync: ubuntu-20 - - * GCC 9.3.0 - - .. tab-item:: RHEL 8 - :sync: rhel-8 - - * GCC 8.4.1 - - .. tab-item:: CentOS 7 - :sync: centos-7 - - * GCC 8.3.1 - Use the following instructions to install it: - - Install GCC 8.3.1 via devtoolset-8 - - .. code-block:: sh - - sudo yum update -y && sudo yum install -y centos-release-scl epel-release - sudo yum install -y devtoolset-8 - - Enable devtoolset-8 and check current gcc version - - .. code-block:: sh - - source /opt/rh/devtoolset-8/enable - gcc -v + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst index 20f5df9b30a9d1..e9157a99e1c882 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst @@ -16,22 +16,8 @@ Install OpenVINO™ Runtime on macOS from an Archive File * is dedicated to macOS users (archives for other systems are also available) * is only supported for CPU Plugin - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Software Requirements - :sync: software-requirements - - * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default install). - * `Python 3.9 - 3.12 `__ (choose 3.9 - 3.12). Install and add to path. - * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory - * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst index bcc03f37d74295..8f3efeeb720dc9 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst @@ -15,43 +15,8 @@ Install OpenVINO™ Runtime on Windows from an Archive File * additionally includes code samples * is dedicated to Windows users (archives for other systems are also available) - -System Requirements -#################### - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `Microsoft Visual Studio 2019 with MSBuild `__ or `Microsoft Visual Studio 2022 `__ - * `CMake 3.14 or higher, 64-bit `__ (optional, only required for building sample applications) - * `Python 3.9 - 3.12, 64-bit `__ - - .. note:: - - To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide `__. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. - - .. note:: - - You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. - - .. important:: - - When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `__ to your `PATH` environment variable. - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst b/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst index f111c0f18e3fec..b1710f3bb358e8 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-brew.rst @@ -14,43 +14,8 @@ Install OpenVINO™ Runtime via Homebrew * does not offer support for NPU inference * is dedicated to macOS (both arm64 and x86_64) and Linux (x86_64 only) users. - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software Requirements - :sync: software-requirements - - .. tab-set:: - - .. tab-item:: Linux - :sync: linux - - * `Homebrew `_ - * `CMake 3.13 or higher, 64-bit `__ - * GCC 7.5.0 (for Ubuntu 18.04), GCC 9.3.0 (for Ubuntu 20.04) or GCC 11.3.0 (for Ubuntu 22.04) - * `Python 3.9 - 3.12, 64-bit `__ - - .. tab-item:: macOS - :sync: macos - - * `Homebrew `_ - * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default installation). - * `Python 3.9 - 3.12 `__ . Install and add it to path. - * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory to install it. - * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ########################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst index 4cf32c7fc474c2..06557003b3cbf6 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conan.rst @@ -15,26 +15,8 @@ Install OpenVINO™ Runtime from Conan Package Manager * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - | There are many ways to work with Conan Package Manager. Before you proceed, learn more about it on the - | `Conan distribution page `__ + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime with Conan Package Manager ############################################################ diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst index e551971fd98f92..d1392d3f46a513 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst @@ -16,28 +16,8 @@ Install OpenVINO™ Runtime from Conda Forge * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - - .. tab-item:: Software - :sync: software - - | There are many ways to work with Conda. Before you proceed, learn more about it on the - | `Anaconda distribution page `__ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime with Anaconda Package Manager ############################################################ diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst index 38795bd529c082..de7b2ed3ff622e 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-docker-linux.rst @@ -6,12 +6,7 @@ Install Intel® Distribution of OpenVINO™ Toolkit From a Docker Image manually to install OpenVINO™ Runtime on Linux and Windows operating systems. This guide presents information on how to use a pre-built Docker image or create a new image -manually, to install OpenVINO™ Runtime. The supported host operating systems for the Docker -base image are: - -- Linux -- Windows (WSL2) -- macOS (CPU exectuion only) +manually, to install OpenVINO™ Runtime. You can get started easily with pre-built and published docker images, which are available at: @@ -34,11 +29,25 @@ The Docker CI repository includes guides on how to `get started with docker images `__ and how to use `OpenVINO™ Toolkit containers with GPU accelerators. `__ -To start using Dockerfiles the following conditions must be met: +To start using Dockerfiles, install Docker Engine or a compatible container +engine on your system: + +.. tab-set:: + + .. tab-item:: Linux + :sync: linux + + * `Docker Desktop `__ + * `Docker Engine `__ + + .. tab-item:: Windows (WSL2) + :sync: win + + OpenVINO can be installed under :ref:`Windows Subsystem for Linux (WSL2) `. + + * `Docker Desktop `__ -- Linux OS or Windows (under :ref:`Windows Subsystem for Linux (WSL2) `) -- Installed docker engine or compatible container engine -- Permissions to run containers (sudo or docker group membership) +Also, verify you have permissions to run containers (sudo or docker group membership). .. note:: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst b/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst index 7d811c69e5991c..5060ccfc654229 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-npm.rst @@ -15,19 +15,8 @@ Install Intel® Distribution of OpenVINO™ Toolkit from npm Registry (all x86_64 / arm64 architectures) * macOS offers support only for CPU inference -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - - Windows, Linux, macOS - - x86, ARM (Windows ARM not supported) - - .. tab-item:: Software Requirements - :sync: software-requirements - - `Node.js version 21.0.0 and higher `__ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Node.js ########################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst index c079f167761ada..cd3fd41fed03e0 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-pip.rst @@ -16,26 +16,8 @@ Install Intel® Distribution of OpenVINO™ Toolkit from PyPI Repository (all x86_64 / arm64 architectures) * macOS offers support only for CPU inference - -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - | `PyPI OpenVINO page `__ - - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU - inference, refer to: - | `Product Specifications `__ - - - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ########################### @@ -137,7 +119,7 @@ to see if your case needs any of them. .. code-block:: python - from openvino import get_cmake_path + from openvino.utils import get_cmake_path cmake_path = get_cmake_path() For detailed instructions on how to use these configurations in your build setup, check out the diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst b/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst index 747d4d873057f6..af9fe85528ca5d 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-vcpkg.rst @@ -15,26 +15,8 @@ Install OpenVINO™ Runtime via vcpkg * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software Requirements - :sync: software-requirements - - * `vcpkg `__ - - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Installing OpenVINO Runtime ########################### diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst index 0ff1b95c8eb212..475f623ef86598 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yocto.rst @@ -108,6 +108,6 @@ Additional Resources - `Official Yocto Project documentation `__ - `BitBake Tool `__ - `Poky `__ -- `Meta-intel `__ +- `Meta-intel `__ - `Meta-openembedded `__ - `Meta-clang `__ \ No newline at end of file diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst index 6775495fcd2a30..970bb47a095d5b 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst @@ -15,41 +15,8 @@ Install OpenVINO™ Runtime on Linux From YUM Repository * is dedicated to Linux users only * additionally includes code samples -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. note:: - - OpenVINO RPM packages are compatible with and can be run on the following operating systems: - - - RHEL 8.2 and higher - - Amazon Linux 2022 and 2023 - - Rocky Linux 8.7, 8.8 and 9.2-9.3 - - Alma Linux 8.7, 8.8 and 9.2-9.4 - - Oracle Linux 8.7, 8.8 and 9.2-9.4 - - Fedora 29 and higher up to 41 - - OpenEuler 20.03, 22.03, 23.03 and 24.03 - - Anolis OS 8.6 and 8.8 - - CentOS Stream 8 and 9 - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `CMake 3.13 or higher, 64-bit `_ - * GCC 8.2.0 - * `Python 3.9 - 3.12, 64-bit `_ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Install OpenVINO Runtime ######################## diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst index 8a1ceff7271187..127b26cac0590f 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst @@ -15,33 +15,8 @@ Install OpenVINO™ Runtime on Linux From ZYPPER Repository * is dedicated to Linux users only * additionally includes code samples -.. tab-set:: - - .. tab-item:: System Requirements - :sync: system-requirements - - | Full requirement listing is available in: - | :doc:`System Requirements Page <../../../about-openvino/release-notes-openvino/system-requirements>` - - .. note:: - - OpenVINO RPM packages are compatible with and can be run on the following operating systems: - - - openSUSE Tumbleweed - - .. tab-item:: Processor Notes - :sync: processor-notes - - | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: - | `Product Specifications `__ - - .. tab-item:: Software - :sync: software - - * `CMake 3.13 or higher, 64-bit `_ - * GCC 8.2.0 - * `Python 3.9 - 3.12, 64-bit `_ - + Before installing OpenVINO, see the + :doc:`System Requirements page <../../../about-openvino/release-notes-openvino/system-requirements>`. Install OpenVINO Runtime ######################## diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index f1fd002b48072e..ebd4667d544616 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -9,6 +9,7 @@ Run LLM Inference on OpenVINO with the GenAI Flavor :hidden: NPU inference of LLMs + genai-guide/genai-use-cases This guide will show you how to integrate the OpenVINO GenAI flavor into your application, covering @@ -174,59 +175,6 @@ You can also create your custom streamer for more sophisticated processing: pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer), ov::genai::max_new_tokens(100)); } -Using GenAI in Chat Scenario -################################ - -For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs -may prove beneficial. The chat-specific methods **start_chat** and **finish_chat** are used to -mark a conversation session, as you can see in these simple examples: - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: python - - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path) - - pipe.set_generation_config({'max_new_tokens': 100) - - pipe.start_chat() - while True: - print('question:') - prompt = input() - if prompt == 'Stop!': - break - print(pipe.generate(prompt)) - pipe.finish_chat() - - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - int main(int argc, char* argv[]) { - std::string prompt; - - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); - - ov::genai::GenerationConfig config = pipe.get_generation_config(); - config.max_new_tokens = 100; - pipe.set_generation_config(config) - - pipe.start_chat(); - for (size_t i = 0; i < questions.size(); i++) { - std::cout << "question:\n"; - std::getline(std::cin, prompt); - - std::cout << pipe.generate(prompt) << std::endl; - } - pipe.finish_chat(); - } Optimizing Generation with Grouped Beam Search ####################################################### diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst new file mode 100644 index 00000000000000..953784c03fdef0 --- /dev/null +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst @@ -0,0 +1,433 @@ +GenAI Use Cases +===================== + +This article provides several use case scenarios for Generative AI model +inference. The applications presented in the code samples below +only require minimal configuration, like setting an inference device. Feel free +to explore and modify the source code as you need. + + +Using GenAI for Text-to-Image Generation +######################################## + +Examples below demonstrate inference on text-to-image models, like Stable Diffusion +1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp ` +sample shows basic usage of the ``Text2ImagePipeline`` pipeline. +:ref:`lora.cpp ` shows how to apply LoRA adapters to the pipeline. + + +.. tab-set:: + + .. tab-item:: Python + :sync: python + + .. tab-set:: + + .. tab-item:: main.py + :name: mainpy + + .. code-block:: python + + import openvino_genai + from PIL import Image + import numpy as np + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def infer(model_dir: str, prompt: str): + device = 'CPU' # GPU can be used as well + random_generator = Generator(42) + pipe = openvino_genai.Text2ImagePipeline(model_dir, device) + image_tensor = pipe.generate( + prompt, + width=512, + height=512, + num_inference_steps=20, + num_images_per_prompt=1, + random_generator=random_generator + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + .. tab-item:: LoRA.py + :name: lorapy + + .. code-block:: python + + import openvino as ov + import openvino_genai + import numpy as np + import sys + + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def image_write(path: str, image_tensor: ov.Tensor): + from PIL import Image + image = Image.fromarray(image_tensor.data[0]) + image.save(path) + + + def infer(models_path: str, prompt: str): + prompt = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" + + device = "CPU" # GPU, NPU can be used as well + adapter_config = openvino_genai.AdapterConfig() + + for i in range(int(len(adapters) / 2)): + adapter = openvino_genai.Adapter(adapters[2 * i]) + alpha = float(adapters[2 * i + 1]) + adapter_config.add(adapter, alpha) + + pipe = openvino_genai.Text2ImagePipeline(models_path, device, adapters=adapter_config) + print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") + image = pipe.generate(prompt, + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20) + + image_write("lora.bmp", image) + print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") + image = pipe.generate(prompt, + adapters=openvino_genai.AdapterConfig(), + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20 + ) + image_write("baseline.bmp", image) + + For more information, refer to the + `Python sample `__ + + .. tab-item:: C++ + :sync: cpp + + .. tab-set:: + + .. tab-item:: main.cpp + :name: maincpp + + .. code-block:: cpp + + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::Text2ImagePipeline pipe(models_path, device); + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(20), + ov::genai::num_images_per_prompt(1)); + + imwrite("image_%d.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + .. tab-item:: LoRA.cpp + :name: loracpp + + .. code-block:: cpp + + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::AdapterConfig adapter_config; + for(size_t i = 0; i < (argc - 3)/2; ++i) { + ov::genai::Adapter adapter(argv[3 + 2*i]); + float alpha = std::atof(argv[3 + 2*i + 1]); + adapter_config.add(adapter, alpha); + } + + ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); + + std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; + ov::Tensor image = pipe.generate(prompt, + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("lora.bmp", image, true); + + std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; + image = pipe.generate(prompt, + ov::genai::adapters(), + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("baseline.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + + + + + +Using GenAI in Speech Recognition +################################# + + +The application, shown in code samples below, performs inference on speech +recognition Whisper Models. The samples include the ``WhisperPipeline`` class +and use audio files in WAV format at a sampling rate of 16 kHz as input. + +.. tab-set:: + + .. tab-item:: Python + :sync: cpp + + .. code-block:: python + + import openvino_genai + import librosa + + + def read_wav(filepath): + raw_speech, samplerate = librosa.load(filepath, sr=16000) + return raw_speech.tolist() + + + def infer(model_dir: str, wav_file_path: str): + raw_speech = read_wav(wav_file_path) + pipe = openvino_genai.WhisperPipeline(model_dir) + + def streamer(word: str) -> bool: + print(word, end="") + return False + + result = pipe.generate( + raw_speech, + max_new_tokens=100, + language="<|en|>", + task="transcribe", + return_timestamps=True, + streamer=streamer, + ) + + print() + for chunk in result.chunks: + print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "audio_utils.hpp" + #include "openvino/genai/whisper_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (3 > argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + } + + std::filesystem::path models_path = argv[1]; + std::string wav_file_path = argv[2]; + std::string device = "CPU"; // GPU can be used as well + + ov::genai::WhisperPipeline pipeline(models_path, device); + + ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); + + ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json"); + config.max_new_tokens = 100; + config.language = "<|en|>"; + config.task = "transcribe"; + config.return_timestamps = true; + + auto streamer = [](std::string word) { + std::cout << word; + return false; + }; + + auto result = pipeline.generate(raw_speech, config, streamer); + + std::cout << "\n"; + + for (auto& chunk : *result.chunks) { + std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; + } + + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__. + + +Using GenAI in Chat Scenario +############################ + +For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs +may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific methods are used to +mark a conversation session, as shown in the samples below: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import openvino_genai + + + def streamer(subword): + print(subword, end='', flush=True) + return False + + + def infer(model_dir: str): + device = 'CPU' # GPU can be used as well. + pipe = openvino_genai.LLMPipeline(model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + pipe.generate(prompt, config, streamer) + print('\n----------') + pipe.finish_chat() + + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "openvino/genai/llm_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (2 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + std::string prompt; + std::string models_path = argv[1]; + + std::string device = "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + std::function streamer = [](std::string word) { + std::cout << word << std::flush; + return false; + }; + + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + pipe.generate(prompt, config, streamer); + std::cout << "\n----------\n" + "question:\n"; + } + pipe.finish_chat(); + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + +Additional Resources +##################### + +* :doc:`Install OpenVINO GenAI <../../../get-started/install-openvino/install-openvino-genai>` +* `OpenVINO GenAI Repo `__ +* `OpenVINO GenAI Samples `__ +* `OpenVINO Tokenizers `__ diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst index 77cd0aca62021d..a26b670b5314d0 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst @@ -301,6 +301,19 @@ model to avoid extra computation. This is how it can be done for LLMs: Now the model can be converted to OpenVINO using Optimum Intel Python API or CLI interfaces mentioned above. +Execution on CPU device +########################## + +As mentioned in the :ref:`Composability of different threading runtimes ` section, OpenVINO's default threading runtime, +oneTBB, keeps CPU cores active for a while after inference is done. When using Optimum Intel Python API, +it calls Torch (via HF transformers) for postprocessing, such as beam search or gready search. +Torch uses OpenMP for threading, OpenMP needs to wait for CPU cores that are kept active by +oneTBB. By default, OpenMP uses the `busy-wait `__ which can delay the next OpenVINO inference as well. + +It is recommended to: + +* Limit the number of CPU threads used by Torch with `torch.set_num_threads `__. +* Set the environment variable `OMP_WAIT_POLICY `__ to `PASSIVE`, which disables OpenMP `busy-wait `__. Additional Resources ##################### diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst index 7f220111f64b98..2476a0423e30e1 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst @@ -31,8 +31,8 @@ some examples of popular Generative AI scenarios: To write such pipelines, you can follow the examples provided as part of OpenVINO: -* `OpenVINO Latent Consistency Model C++ image generation pipeline `__ -* `OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline `__ +* `OpenVINO Latent Consistency Model C++ image generation pipeline `__ +* `OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline `__ To perform inference, models must be first converted to OpenVINO IR format using Hugging Face Optimum-Intel API. diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst b/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst index d6e23b3791d001..2064aa843a93d8 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/ov-tokenizers.rst @@ -336,7 +336,7 @@ Additional Resources * `OpenVINO Tokenizers repo `__ * `OpenVINO Tokenizers Notebook `__ -* `Text generation C++ samples that support most popular models like LLaMA 2 `__ +* `Text generation C++ samples that support most popular models like LLaMA 3 `__ * `OpenVINO GenAI Repo `__ diff --git a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst index 19c4a013c54aae..390fe00605f2c6 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst @@ -245,6 +245,13 @@ There are several options for setting the number of inference iterations: The more iterations a model runs, the better the statistics will be for determining average latency and throughput. +Maximum inference rate +++++++++++++++++++++++ + +By default, the benchmarking app will run inference at maximum rate based on device capabilities. +The maximum inferance rate can be configured by ``-max_irate `` option. +Tweaking this value allow better accuracy in power usage measurement by limiting the number of executions. + Inputs ++++++++++++++++++++ @@ -337,7 +344,7 @@ following usage message: [Step 1/11] Parsing and validating input arguments [ INFO ] Parsing input parameters usage: benchmark_app.py [-h [HELP]] [-i PATHS_TO_INPUT [PATHS_TO_INPUT ...]] -m PATH_TO_MODEL [-d TARGET_DEVICE] - [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE] + [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-max_irate MAXIMUM_INFERENCE_RATE] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE] [-data_shape DATA_SHAPE] [-layout LAYOUT] [-extensions EXTENSIONS] [-c PATH_TO_CLDNN_CONFIG] [-cdir CACHE_DIR] [-lfile [LOAD_FROM_FILE]] [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]] [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] @@ -536,6 +543,9 @@ following usage message: 'none': no device performance mode will be set. Using explicit 'nstreams' or other device-specific options, please set hint to 'none' -niter Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. + -max_irate Optional. Maximum inference rate by frame per second. + If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities. + Tweaking this value allow better accuracy in power usage measurement by limiting the execution. -t Optional. Time in seconds to execute topology. Input shapes diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst index 5033d24ba3785a..2a551d7aa44eb5 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst @@ -76,7 +76,7 @@ of optimization methods (`"compression"` section). :fragment: [nncf_congig] Here is a brief description of the required parameters of the Filter Pruning method. For a full description refer to the -`GitHub `__ page. +`GitHub `__ page. * ``pruning_init`` - initial pruning rate target. For example, value ``0.1`` means that at the begging of training, convolutions that can be pruned will have 10% of their filters set to zero. diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst index 6348ca897c5ea5..bbc09ccd4b5fbb 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst @@ -161,15 +161,16 @@ trade-offs after optimization: `Larger Group Size`: Results in faster inference and a smaller model, but might compromise accuracy. -* ``ratio`` controls the ratio between INT4 and INT8_ASYM compressed layers in the model. +* ``ratio`` controls the ratio between the layers compressed to the precision defined + by ``mode`` and the rest of the layers that will be kept in the ``backup_mode`` in the optimized model. Ratio is a decimal between 0 and 1. For example, 0.8 means that 80% of layers will be - compressed to INT4, while the rest will be compressed to INT8_ASYM precision. The default - value for ratio is 1. + compressed to the precision defined by ``mode``, while the rest will be compressed to + ``backup_mode`` precision. The default value for ratio is 1. - `Higher Ratio (more INT4)`: Reduces the model size and increase inference speed but + `Higher Ratio (more layers set to mode precision)`: Reduces the model size and increase inference speed but might lead to higher accuracy degradation. - `Lower Ratio (more INT8_ASYM)`: Maintains better accuracy but results in a larger model size + `Lower Ratio (more layers set to backup_mode precision)`: Maintains better accuracy but results in a larger model size and potentially slower inference. In this example, 90% of the model's layers are quantized to INT4 asymmetrically with @@ -196,8 +197,11 @@ trade-offs after optimization: 4 bits. The method can sometimes result in reduced accuracy when used with Dynamic Quantization of activations. Requires dataset. +* ``gptq`` - boolean parameter that enables the GPTQ method for more accurate INT4 weight + quantization. Requires dataset. + * ``dataset`` - calibration dataset for data-aware weight compression. It is required - for some compression options, for example, ``scale_estimation`` or ``awq``. Some types + for some compression options, for example, ``scale_estimation``, ``gptq`` or ``awq``. Some types of ``sensitivity_metric`` can use data for precision selection. * ``sensitivity_metric`` - controls the metric to estimate the sensitivity of compressing @@ -226,75 +230,108 @@ trade-offs after optimization: * ``all_layers`` - boolean parameter that enables INT4 weight quantization of all Fully-Connected and Embedding layers, including the first and last layers in the model. -For data-aware weight compression refer to the following -`example `__. +* ``lora_correction`` - boolean parameter that enables the LoRA Correction Algorithm + to further improve the accuracy of INT4 compressed models on top of other + algorithms - AWQ and Scale Estimation. -.. note:: +* ``backup_mode`` - defines a backup precision for mixed-precision weight compression. + There are three modes: INT8_ASYM, INT8_SYM, and NONE, which retains + the original floating-point precision of the model weights (``INT8_ASYM`` is default value). - Some methods can be stacked on top of one another to achieve a better - accuracy-performance trade-off after weight quantization. For example, the Scale Estimation - method can be applied along with AWQ and mixed-precision quantization (the ``ratio`` parameter). -The example below shows data-free 4-bit weight quantization -applied on top of OpenVINO IR. Before trying the example, make sure Optimum Intel -is installed in your environment by running the following command: +**Use synthetic data for LLM weight compression** -.. code-block:: python - - pip install optimum[openvino] +It is possible to generate a synthetic dataset using the `nncf.data.generate_text_data` method for +data-aware weight compression. The method takes a language model (e.g. from `optimum.intel.openvino`) +and a tokenizer (e.g. from `transformers`) as input and returns the list of strings generated by the model. +Note that dataset generation takes time and depends on various conditions, like the model size, +requested dataset length or environment setup. Also, since the dataset is generated by the model output, +it does not guarantee significant accuracy improvement after compression. This method is recommended +only when a better dataset is not available. Refer to the +`example `__ +for details of the usage. -The first example loads a pre-trained Hugging Face model using the Optimum Intel API, -compresses it to INT4 using NNCF, and then executes inference with a text phrase. +.. code-block:: python -If the model comes from `Hugging Face `__ and is supported -by Optimum, it may be easier to use the Optimum Intel API to perform weight compression. -The compression type is specified when the model is loaded using the ``load_in_8bit=True`` -or ``load_in_4bit=True`` parameter. The second example uses the Weight Compression API -from Optimum Intel instead of NNCF to compress the model to INT8_ASYM. + from nncf import Dataset + from nncf.data import generate_text_data + from functools import partial + + from transformers import AutoTokenizer, AutoModelForCausalLM + + # Example: Generating synthetic dataset + tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) + hf_model = AutoModelForCausalLM.from_pretrained( + MODEL_ID, export=True, load_in_8bit=False + ) + + # Synthetic-based compression + synthetic_dataset = nncf.data.generate_text_data(hf_model, tokenizer, dataset_size=100) + quantization_dataset = nncf.Dataset( + synthetic_dataset, + transform_fn # see example in NNCF repo how to make transform_fn + ) + + model = compress_weights( + model, + mode=CompressWeightsMode.INT4_ASYM, + group_size=64, + ratio=1.0, + dataset=quantization_dataset, + awq=True, + scale_estimation=True + ) # model is openvino.Model -.. tab-set:: +For data-aware weight compression refer to the following +`example `__. - .. tab-item:: OpenVINO - :sync: openvino +.. note:: - .. code-block:: python + Some methods can be stacked on top of one another to achieve a better + accuracy-performance trade-off after weight quantization. For example, the **Scale Estimation** + method can be applied along with **AWQ** and mixed-precision quantization (the ``ratio`` parameter). - from nncf import compress_weights, CompressWeightsMode - from optimum.intel.openvino import OVModelForCausalLM - from transformers import AutoTokenizer, pipeline - # Load model from Hugging Face - model_id = "HuggingFaceH4/zephyr-7b-beta" - model = OVModelForCausalLM.from_pretrained(model_id, export=True, load_in_8bit=False, compile=False) +**Hugging Face Optimum-Intel API** - # Compress to INT4 Symmetric - model.model = compress_weights(model.model, mode=CompressWeightsMode.INT4_SYM) +Hugging Face Optimum-Intel provides an easy way to use NNCF Weight Compression capabilities to optimize +various large Transformer models. Most of the options of the NNCF ``nncf.compress_weights()`` API are +exposed in the ``.from_pretrained()`` method of Optimum-Intel classes. Optimum also has several datasets +for data-aware quantization available out-of-the-box. +The example below shows data-free 4-bit weight quantization +applied on top of OpenVINO IR. Before trying the example, make sure Optimum Intel +is installed in your environment by running the following command: - # Inference - model.compile() - tokenizer = AutoTokenizer.from_pretrained(model_id) - pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) - phrase = "The weather is" - results = pipe(phrase) - print(results) +.. code-block:: python - .. tab-item:: Optimum-Intel + pip install optimum[openvino] - .. code-block:: python +.. code-block:: python - from optimum.intel.openvino import OVModelForCausalLM - from transformers import AutoTokenizer, pipeline + from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig + from transformers import AutoTokenizer, pipeline - # Load and compress model from Hugging Face - model_id = "HuggingFaceH4/zephyr-7b-beta" - model = OVModelForCausalLM.from_pretrained(model_id, export=True, load_in_8bit=True) + # Load and compress model from Hugging Face + model_id = "microsoft/Phi-3.5-mini-instruct" + model = OVModelForCausalLM.from_pretrained( + model_id, + export=True, + quantization_config=OVWeightQuantizationConfig( + bits=4, + quant_method="awq", + scale_estimation=True, + dataset="wikitext2", + group_size=64, + ratio=1.0 + ) + ) - # Inference - tokenizer = AutoTokenizer.from_pretrained(model_id) - pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) - phrase = "The weather is" - results = pipe(phrase) - print(results) + # Inference + tokenizer = AutoTokenizer.from_pretrained(model_id) + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + phrase = "The weather is" + results = pipe(phrase) + print(results) Exporting and Loading Compressed Models @@ -309,12 +346,12 @@ load the compressed model later for faster time to first inference. .. code-block:: python # Save compressed model for faster loading later - model.save_pretrained("zephyr-7b-beta-int4-sym-ov") - tokenizer.save_pretrained("zephyr-7b-beta-int4-sym-ov") + model.save_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") + tokenizer.save_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") # Load a saved model - model = OVModelForCausalLM.from_pretrained("zephyr-7b-beta-int4-sym-ov") - tokenizer = AutoTokenizer.from_pretrained("zephyr-7b-beta-int4-sym-ov") + model = OVModelForCausalLM.from_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") + tokenizer = AutoTokenizer.from_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") GPTQ Models ############ diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst index 41d43f7eea37d6..aa8e9cdabfda64 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes.rst @@ -83,7 +83,7 @@ Accordingly, the code that loops over all available devices of the "GPU" type on Additional Resources #################### -* `OpenVINO™ Runtime API Tutorial <./../../notebooks/openvino-api-with-output.html>`__ -* `AUTO Device Tutorial <./../../notebooks/auto-device-with-output.html>`__ -* `GPU Device Tutorial <./../../notebooks/gpu-device-with-output.html>`__ -* `NPU Device Tutorial <./../../notebooks/hello-npu-with-output.html>`__ \ No newline at end of file +* `OpenVINO™ Runtime API Tutorial <../../notebooks/openvino-api-with-output.html>`__ +* `AUTO Device Tutorial <../../notebooks/auto-device-with-output.html>`__ +* `GPU Device Tutorial <../../notebooks/gpu-device-with-output.html>`__ +* `NPU Device Tutorial <../../notebooks/hello-npu-with-output.html>`__ \ No newline at end of file diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst index dc158fe9352042..46b541d84d4035 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst @@ -63,19 +63,19 @@ the model precision and the ratio of P-cores and E-cores. Then the default settings for low-level performance properties on Windows and Linux are as follows: -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| Property | Windows | Linux | -+======================================+========================================================================+====================================================================+ -| ``ov::num_streams`` | 1 | 1 | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one socket | is equal to the number of P-cores or P-cores+E-cores on one socket | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::enable_hyper_threading`` | No | No | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ -| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | -+--------------------------------------+------------------------------------------------------------------------+--------------------------------------------------------------------+ ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| Property | Windows | Linux | ++======================================+=======================================================================+=======================================================================+ +| ``ov::num_streams`` | 1 | 1 | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::inference_num_threads`` | is equal to the number of P-cores or P-cores+E-cores on one numa node | is equal to the number of P-cores or P-cores+E-cores on one numa node | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::scheduling_core_type`` | :ref:`Core Type Table of Latency Hint ` | :ref:`Core Type Table of Latency Hint ` | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::enable_hyper_threading`` | No | No | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| ``ov::hint::enable_cpu_pinning`` | No / Not Supported | Yes except using P-cores and E-cores together | ++--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+ .. note:: @@ -91,6 +91,16 @@ Then the default settings for low-level performance properties on Windows and Li enabled on Linux. Such default settings are aligned with typical workloads running in the corresponding environments to guarantee better out-of-the-box (OOB) performance. +.. note:: + + Starting from 5th Gen Intel Xeon Processors, new microarchitecture enabled new sub-NUMA clusters + feature. A sub-NUMA cluster (SNC) can create two or more localization domains (numa nodes) + within a socket by BIOS configuration. + By default OpenVINO with latency hint uses single NUMA node for inference. Although such + behavior allows to achive best performance for most of the models, there might be corner + cases which require manual tuning of ``ov::num_streams`` and ``ov::hint::enable_hyper_threading parameters``. + Please find more detail about `Sub-NUMA Clustering `__ + Throughput Hint ##################### @@ -187,3 +197,23 @@ are executed in parallel. For details on multi-stream execution check the :doc:`optimization guide <../../optimize-inference/optimizing-throughput/advanced_throughput_options>`. + +.. _Composability_of_different_threading_runtimes: + +Composability of different threading runtimes +############################################# + +OpenVINO is by default built with the `oneTBB `__ threading library, +oneTBB has a feature `worker_wait`, similar to `OpenMP `__ `busy-wait `__, which makes OpenVINO inference +threads wait actively for a while after a task done. The intention is to avoid CPU inactivity in the +transition time between inference tasks. + +In the pipeline that runs OpenVINO inferences on the CPU along with other sequential application logic, using different threading runtimes (e.g., OpenVINO inferences use oneTBB, +while other application logic uses OpenMP) will cause both to occupy CPU cores for additional time after the task done, leading to overhead. + +Recommended solutions: + +- The most effective way is to use oneTBB for all computations made in the pipeline. +- Rebuild OpenVINO with OpenMP if other application logic uses OpenMP. +- Limit the number of threads for OpenVINO and other parts and let OS do the scheduling. +- If other application logic uses OpenMP, set the environment variable `OMP_WAIT_POLICY `__ to `PASSIVE` to disable OpenMP `busy-wait `__. diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst index 78cf0632f61b2b..b4e1c7ac15afcc 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/gpu-device.rst @@ -19,7 +19,7 @@ For an in-depth description of the GPU plugin, see: - `GPU plugin developer documentation `__ - `OpenVINO Runtime GPU plugin source files `__ -- `Accelerate Deep Learning Inference with Intel® Processor Graphics `__ +- `Start AI Development with Intel `__ The GPU plugin is a part of the Intel® Distribution of OpenVINO™ toolkit. For more information on how to configure a system to use it, see the :doc:`GPU configuration <../../../get-started/configurations/configurations-intel-gpu>`. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst index 26a09214ea462a..e45f51a37afa5e 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst @@ -119,9 +119,6 @@ The hints are used on the presumption that the application queries ``ov::optimal While an application is free to create more requests if needed (for example to support asynchronous inputs population) **it is very important to at least run the** ``ov::optimal_number_of_infer_requests`` **of the inference requests in parallel**. It is recommended for efficiency, or device utilization, reasons. -Keep in mind that ``ov::hint::PerformanceMode::LATENCY`` does not necessarily imply using single inference request. For example, multi-socket CPUs can deliver as many requests at the same minimal latency as the number of NUMA nodes in the system. -To make your application fully scalable, make sure to query the ``ov::optimal_number_of_infer_requests`` directly. - .. _prefer-async-api: Prefer Async API diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst index 7466d00efe5eb7..cad5633e11f85b 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-throughput/advanced_throughput_options.rst @@ -85,12 +85,12 @@ Number of Streams Considerations * Select the number of streams that is **less or equal** to the number of requests that the application would be able to run simultaneously. * To avoid wasting resources, the number of streams should be enough to meet the *average* parallel slack rather than the peak load. -* Use the `ov::streams::AUTO `__ as a more portable option (that also respects the underlying hardware configuration). +* Use the `ov::streams::AUTO <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv44AUTO>`__ as a more portable option (that also respects the underlying hardware configuration). * It is very important to keep these streams busy, by running as many inference requests as possible (for example, start the newly-arrived inputs immediately): - * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests `__ of the ``ov:Compiled_Model``. + * A bare minimum of requests to saturate the device can be queried as the `ov::optimal_number_of_infer_requests <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv432optimal_number_of_infer_requests>`__ of the ``ov:Compiled_Model``. -* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams `__. +* *The maximum number of streams* for the device (per model) can be queried as the `ov::range_for_streams <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv417range_for_streams>`__. Batch Size Considerations +++++++++++++++++++++++++ @@ -99,7 +99,7 @@ Batch Size Considerations * Otherwise (or if the number of "available" requests fluctuates), you may need to keep several instances of the network (reshaped to the different batch size) and select the properly sized instance in the runtime accordingly. -* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size `__ is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model. +* For OpenVINO devices that implement a dedicated heuristic internally, the `ov::optimal_batch_size <../../../../api/c_cpp_api/group__ov__runtime__cpp__prop__api.html#_CPPv418optimal_batch_size>`__ is a *device* property (that accepts the actual model as a parameter) to query the recommended batch size for the model. A Few Device-specific Details diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst index 86788b20249a3f..d00fd19c4d636d 100644 --- a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst +++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst @@ -139,5 +139,5 @@ sequences. You can find more examples demonstrating how to work with states in other articles: -* `LLM Chatbot notebook <../../notebooks/stable-zephyr-3b-chatbot-with-output.html>`__ +* `LLaVA-NeXT Multimodal Chatbot notebook <../../notebooks/llava-next-multimodal-chatbot-with-output.html>`__ * :doc:`Serving Stateful Models with OpenVINO Model Server <../../openvino-workflow/model-server/ovms_docs_stateful_models>` diff --git a/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst b/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst index 438c9ea9ec0bd3..3032add547f8a8 100644 --- a/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst +++ b/docs/articles_en/openvino-workflow/running-inference/string-tensors.rst @@ -201,6 +201,6 @@ Additional Resources * Learn about the :doc:`basic steps to integrate inference in your application `. -* Use `OpenVINO tokenizers `__ to produce models that use string tensors to work with textual information as pre- and post-processing for the large language models. +* Use `OpenVINO tokenizers `__ to produce models that use string tensors to work with textual information as pre- and post-processing for the large language models. -* Check out `GenAI Samples `__ to see how string tensors are used in real-life applications. +* Check out `GenAI Samples `__ to see how string tensors are used in real-life applications. diff --git a/docs/articles_en/openvino-workflow/torch-compile.rst b/docs/articles_en/openvino-workflow/torch-compile.rst index 6d874ff4d14be3..e5bc0ca901a5aa 100644 --- a/docs/articles_en/openvino-workflow/torch-compile.rst +++ b/docs/articles_en/openvino-workflow/torch-compile.rst @@ -20,6 +20,186 @@ By default, Torch code runs in eager-mode, but with the use of ``torch.compile`` How to Use #################### + +.. tab-set:: + + .. tab-item:: Image Generation + + .. tab-set:: + + .. tab-item:: Stable-Diffusion-2 + + .. code-block:: py + :force: + + import torch + from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler + + model_id = "stabilityai/stable-diffusion-2-1" + + # Use the DPMSolverMultistepScheduler (DPM-Solver++) scheduler here instead + pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) + pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) + + + pipe.text_encoder = torch.compile(pipe.text_encoder, backend="openvino") #Optional + + pipe.unet = torch.compile(pipe.unet, backend=“openvino”) + + pipe.vae.decode = torch.compile(pipe.vae.decode, backend=“openvino”) #Optional + + prompt = "a photo of an astronaut riding a horse on mars" + image = pipe(prompt).images[0] + + image.save("astronaut_rides_horse.png") + + + .. tab-item:: Stable-Diffusion-3 + + .. code-block:: py + + import torch + from diffusers import StableDiffusion3Pipeline + + pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float32) + + + pipe.transformer = torch.compile(pipe.transformer, backend="openvino") + + image = pipe( + "A cat holding a sign that says hello world", + negative_prompt="", + num_inference_steps=28, + guidance_scale=7.0, + ).images[0] + + image.save('out.png') + + .. tab-item:: Stable-Diffusion-XL + + .. code-block:: py + + import torch + from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler + + unet = UNet2DConditionModel.from_pretrained("latent-consistency/lcm-sdxl", torch_dtype=torch.float16, variant="fp16") + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=unet, torch_dtype=torch.float16, variant="fp16") + pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) + + + pipe.text_encoder = torch.compile(pipe.text_encoder, backend="openvino") #Optional + + pipe.unet = torch.compile(pipe.unet, backend="openvino") + + pipe.vae.decode = torch.compile(pipe.vae.decode, backend="openvino") #Optional + + prompt = "a close-up picture of an old man standing in the rain" + image = pipe(prompt, num_inference_steps=5, guidance_scale=8.0).images[0] + image.save("result.png") + + .. tab-item:: Text Generation + + .. tab-set:: + + .. tab-item:: Llama-3.2-1B + + .. code-block:: py + + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + + model_name_or_path = "meta-llama/Llama-3.2-1B-Instruct" + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype=torch.float32) + model = AutoModelForCausalLM.from_pretrained( + model_name_or_path, + trust_remote_code=True, + device_map='cpu', + torch_dtype=torch.float32 + ) + + prompt = "Tell me about AI" + + + model.forward = torch.compile(model.forward, backend="openvino", options={'aot_autograd': True}) + + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + max_new_tokens=64 + ) + result = pipe(prompt) + print(result[0]['generated_text']) + + + .. tab-item:: Llama-2-7B-GPTQ + + .. code-block:: py + + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + + model_name_or_path = "TheBloke/Llama-2-7B-GPTQ" + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype=torch.float32) + model = AutoModelForCausalLM.from_pretrained( + model_name_or_path, + trust_remote_code=True, + device_map='cpu', + torch_dtype=torch.float32 + ) + + prompt = "Tell me about AI" + + + model.forward = torch.compile(model.forward, backend="openvino", options={'aot_autograd': True}) + + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + max_new_tokens=64 + ) + result = pipe(prompt) + print(result[0]['generated_text']) + + + .. tab-item:: Chatglm-4-GPTQ + + .. code-block:: py + + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer + + query = "tell me about AI“ + + tokenizer = AutoTokenizer.from_pretrained("mcavus/glm-4v-9b-gptq-4bit-dynamo", trust_remote_code=True) + inputs = tokenizer.apply_chat_template([{"role": "user", "content": query}], + add_generation_prompt=True, + tokenize=True, + return_tensors="pt", + return_dict=True + ) + model = AutoModelForCausalLM.from_pretrained( + "mcavus/glm-4v-9b-gptq-4bit-dynamo", + torch_dtype=torch.float32, + low_cpu_mem_usage=True, + trust_remote_code=True + ) + + + model.transformer.encoder.forward = torch.compile(model.transformer.encoder.forward, backend="openvino", options={"aot_autograd":True}) + + gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} + with torch.no_grad(): + outputs = model.generate(**inputs, **gen_kwargs) + outputs = outputs[:, inputs['input_ids'].shape[1]:] + print(tokenizer.decode(outputs[0], skip_special_tokens=True)) + + + + + + + + + + + + + + + + To use ``torch.compile``, you need to define the ``openvino`` backend in your PyTorch application. This way Torch FX subgraphs will be directly converted to OpenVINO representation without any additional PyTorch-based tracing/scripting. @@ -108,7 +288,7 @@ PyTorch supports ``torch.compile`` officially on Windows from version 2.3.0 onwa For PyTorch versions below 2.3.0, the ``torch.compile`` feature is not supported on Windows officially. However, it can be accessed by running the following instructions: -1. Install the PyTorch nightly wheel file - `2.1.0.dev20230713 `__ , +1. Install the PyTorch nightly wheel file - `2.1.0.dev20230713 `__ , 2. Update the file at ``/Lib/site-packages/torch/_dynamo/eval_frames.py`` 3. Find the function called ``check_if_dynamo_supported()``: @@ -194,7 +374,7 @@ The ``torch.compile`` feature is part of PyTorch 2.0, and is based on: (PEP 523) to dynamically modify Python bytecode right before it is executed (PyTorch operators that cannot be extracted to FX graph are executed in the native Python environment). It maintains the eager-mode capabilities using - `Guards `__ to ensure the + `Guards `__ to ensure the generated graphs are valid. * **AOTAutograd** - generates the backward graph corresponding to the forward graph captured by TorchDynamo. diff --git a/docs/dev/build_riscv64.md b/docs/dev/build_riscv64.md index 13ab9e98d56958..19d6e8714b45fc 100644 --- a/docs/dev/build_riscv64.md +++ b/docs/dev/build_riscv64.md @@ -16,15 +16,23 @@ The software was validated on the following devices: - Python 3.10 for OpenVINO Runtime Python API ## How to build +Currently, there are three ways to build OpenVINO Runtime for 64-bit RISC-V platforms: + +1. **Recommended**. The build with vectorized (using RVV instructions) primitives for limited scope of operations from [`SHL`](https://github.com/XUANTIE-RV/csi-nn2) using [`xuantie-gnu-toolchain`](https://github.com/XUANTIE-RV/). This GNU Compiler Toolchain supports RVV 0.7.1, ratified RVV 1.0 and Xuantie-specific instruction sets. The vector intrinsics don't use the common prefix `__riscv_`. This method provides the best performance available at the moment. +2. The build without optimized primitives using [`riscv-gnu-toolchain`](https://github.com/riscv-collab/riscv-gnu-toolchain.git). This GNU Compiler Toolchain supports RVV 0.7.1 and ratified RVV 1.0. The vector intrinsics use the common prefix `__riscv_`. However, as mentioned earlier, this build method doesn't yet provide optimized primitives implemented using the RVV intrinsics. +3. The build without optimized primitives using installed Linux packages. The compilers in these packages don't support RVV intrinsics. + +### Steps + 0. Prerequisite: -- For target with RVV - build `xuantie-gnu-toolchain` and `qemu`: +- For target with vectorized primitives from `SHL` - build `xuantie-gnu-toolchain` and `qemu`: ```sh git clone https://github.com/XUANTIE-RV/xuantie-gnu-toolchain.git cd xuantie-gnu-toolchain ./configure --prefix= make linux build-qemu -j$(nproc) ``` -- For target without RVV - build `riscv-gnu-toolchain`: +- For target without optimized primitives using `riscv-gnu-toolchain`: ```sh git clone https://github.com/riscv-collab/riscv-gnu-toolchain.git cd riscv-gnu-toolchain @@ -32,6 +40,11 @@ The software was validated on the following devices: make linux build-qemu -j$(nproc) ``` > **NOTE**: The `build-qemu` target is optional, as it is used to build the `qemu` simulator. However, it is recommended to build the `qemu` simulator, since it is much more convenient to validate the software on your host than on your devices. More information can be seen [here](https://github.com/riscv-collab/riscv-gnu-toolchain). +- For target without optimized primitives using installed Linux packages: + ```sh + apt-get update + apt-get install -y gcc-riscv64-linux-gnu g++-riscv64-linux-gnu binutils-riscv64-linux-gnu + ``` 1. Clone OpenVINO repository and init submodules: ```sh @@ -50,8 +63,8 @@ The software was validated on the following devices: mkdir build && cd build ``` -4. To cross compile OpenVINO Runtime for RISC-V devices, run `cmake` with specified `CMAKE_TOOLCHAIN_FILE` and `RISCV_TOOLCHAIN_ROOT`. -- For target with RVV: +4. To cross compile OpenVINO Runtime for RISC-V devices, run `cmake` with specified `CMAKE_TOOLCHAIN_FILE` and `RISCV_TOOLCHAIN_ROOT` (the last one is needed only for build using GNU toolchain). +- For target with vectorized primitives from `SHL`: ```sh cmake .. \ -DCMAKE_BUILD_TYPE=Release \ @@ -59,8 +72,8 @@ The software was validated on the following devices: -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/ \ -DRISCV_TOOLCHAIN_ROOT= ``` - > **NOTE**: To build OpenVINO Runtime for different versions of RVV, you just need to specify corresponding toolchain files. For exmaple, you can replace `` with `riscv64-071-thead-gnu.toolchain.cmake` for RVV 0.7.1 and `riscv64-100-thead-gnu.toolchain.cmake` for RVV 1.0 respectively. -- For target without RVV: + > **NOTE**: To build OpenVINO Runtime for different versions of RVV, you just need to specify corresponding toolchain files. For example, you can replace `` with `riscv64-071-xuantie-gnu.toolchain.cmake` for RVV 0.7.1 and `riscv64-100-xuantie-gnu.toolchain.cmake` for RVV 1.0 respectively. +- For target without optimized primitives using `riscv-gnu-toolchain`: ```sh cmake .. \ -DCMAKE_BUILD_TYPE=Release \ @@ -69,7 +82,13 @@ The software was validated on the following devices: -DRISCV_TOOLCHAIN_ROOT=/opt/riscv ``` > **NOTE**: The `riscv-gnu-toolchain` is build as there are essential files used for cross compilation under `/opt/riscv/sysroot`. The latest stable versions of Clang or GCC both support compiling source code into RISC-V instructions, so it is acceptable to choose your preferable compilers by specifying `-DCMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER`. But remember to add the key `-DCMAKE_SYSROOT=/opt/riscv/sysroot`, otherwise many fundamental headers and libs could not be found during cross compilation. - +- For target without optimized primitives using installed Linux packages: + ```sh + cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX= \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/riscv64.linux.toolchain.cmake + ``` > **NOTE**: By default OpenVINO is built with OpenMP support on RISC-V devices. Then run `make` to build the project: diff --git a/docs/dev/build_windows.md b/docs/dev/build_windows.md index 10049485202cca..4a9761f5364046 100644 --- a/docs/dev/build_windows.md +++ b/docs/dev/build_windows.md @@ -25,7 +25,7 @@ Supported configurations: ```sh git clone https://github.com/openvinotoolkit/openvino.git cd openvino - git submodule update --init + git submodule update --init --recursive ``` 2. Create build directory: diff --git a/docs/dev/ci/commit_signoff_policy.md b/docs/dev/ci/commit_signoff_policy.md new file mode 100644 index 00000000000000..0328d3c3ec308c --- /dev/null +++ b/docs/dev/ci/commit_signoff_policy.md @@ -0,0 +1,74 @@ +# How to sign-off commits + +We require a sign-off commit message in the following format on each commit in pull request. + +``` +This is a commit message. + +Signed-off-by: Author Name +``` + +## How to sign-off new commits + +In a local Git environment, the sign-off message can be added to a commit either manually (as a text) +or via the **-s** flag used with the “git commit” command, for example: + +`git commit -s -m "My commit message"` + +To avoid manually adding the flag for each commit, we recommend setting up a Git hook with the following steps: + +1. Navigate to the `/.git/hooks` folder. +2. Open the `prepare-commit-msg.sample` file and paste the following content: + +``` +#!/bin/sh + +COMMIT_MSG_FILE=$1 +COMMIT_SOURCE=$2 +SHA1=$3 + +NAME=$(git config user.name) +EMAIL=$(git config user.email) + +if [ -z "$NAME" ]; then + echo "empty git config user.name" + exit 1 +fi + +if [ -z "$EMAIL" ]; then + echo "empty git config user.email" + exit 1 +fi + +git interpret-trailers --if-exists doNothing --trailer \ + "Signed-off-by: $NAME <$EMAIL>" \ + --in-place "$1" +``` + +3. Save the file with the name `prepare-commit-msg` (remove the .sample extension). +4. Make the file executable (on Linux / Git Bash: `chmod +x /.git/hooks/prepare-commit-msg`). + +**Note**: For both sign-off approaches, ensure your user name and email address are configured in Git first: + +``` +git config user.name 'FIRST_NAME LAST_NAME' +git config user.email 'MY_EMAIL@example.com' +``` + +### Sign-off web-based commits + +To enable automatic sign-off of commits made via GitHub web interface, make sure that +[Require contributors to sign off on web-based commits](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/managing-the-commit-signoff-policy-for-your-repository#enabling-or-disabling-compulsory-commit-signoffs-for-your-repository) +setting is selected in the Settings menu of your OpenVINO repository fork. + +## How to sign-off older commits in the history + +If you forget to add the sign-off to your last commit, you can amend it and force-push to GitHub: + +``` +git commit --amend --signoff +``` + +To sign off on even older commits, use an interactive rebase, edit unsigned commits, and execute +`git commit --amend --signoff` for each. However, please note that if others have already started working based on +the commits in this branch, this will rewrite history and may cause issues for collaborators. diff --git a/docs/documentation_build_instructions.md b/docs/documentation_build_instructions.md index d9219454b86a19..a1412cfc3f358d 100644 --- a/docs/documentation_build_instructions.md +++ b/docs/documentation_build_instructions.md @@ -26,7 +26,7 @@ $ source env/bin/activate ``` 5. Install the sphinx theme ``` -(env) $ cd docs/openvino_sphinx_theme && python setup.py install && cd - +(env) $ python -m pip install docs/openvino_sphinx_theme `````` 6. Install the custom sphinx sitemap ``` diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index c6965d054f0991..bfad4b042e5359 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241007220823/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241104220807/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index cdb1e4ff5083cd..f39aa93b36851d 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -11,14 +11,14 @@ of this notebook, you will see live inference results from your webcam out the algorithms. **Make sure you have properly installed the**\ `Jupyter extension `__\ **and -been using JupyterLab to run the demo as suggested in the** -``README.md`` +been using JupyterLab to run the demo as suggested in the +``README.md``** **NOTE**: *To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run on a remote server, the webcam will not work. However, you can still do inference on a video file in - the final step. This demo utilizes the Python interface in* - ``Three.js`` *integrated with WebGL to process data from the model + the final step. This demo utilizes the Python interface in + ``Three.js`` integrated with WebGL to process data from the model inference. These results are processed and displayed in the notebook.* @@ -87,13 +87,13 @@ Prerequisites -**The** ``pythreejs`` **extension may not display properly when using a +**The ``pythreejs`` extension may not display properly when using a Jupyter Notebook release. Therefore, it is recommended to use Jupyter Lab instead.** .. code:: ipython3 - %pip install pythreejs "openvino-dev>=2024.0.0" "opencv-python" "torch" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install pythreejs "openvino>=2024.4.0" "opencv-python" "torch" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: @@ -101,85 +101,75 @@ Lab instead.** Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu Collecting pythreejs Using cached pythreejs-2.4.2-py3-none-any.whl.metadata (5.4 kB) - Collecting openvino-dev>=2024.0.0 - Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl.metadata (16 kB) + Collecting openvino>=2024.4.0 + Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.3 kB) Collecting opencv-python Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB) Collecting torch Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) - Collecting onnx<1.16.2 - Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Collecting tqdm + Using cached tqdm-4.66.6-py3-none-any.whl.metadata (57 kB) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) - Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) - Collecting networkx<=3.1.0 (from openvino-dev>=2024.0.0) - Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) - Collecting openvino-telemetry>=2023.2.1 (from openvino-dev>=2024.0.0) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Collecting openvino-telemetry>=2023.2.1 (from openvino>=2024.4.0) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) - Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) - Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) - Collecting openvino==2024.4.0 (from openvino-dev>=2024.0.0) - Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.3 kB) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.1) Collecting filelock (from torch) Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Collecting networkx (from torch) + Using cached https://download.pytorch.org/whl/networkx-3.2.1-py3-none-any.whl (1.6 MB) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) - Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB) - Collecting protobuf>=3.20.2 (from onnx<1.16.2) - Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) + Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + INFO: pip is looking at multiple versions of networkx to determine which version is compatible with other requirements. This could take a while. + Collecting networkx (from torch) + Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) - Using cached openvino_dev-2024.4.0-16579-py3-none-any.whl (4.7 MB) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB) - Using cached onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB) + Using cached tqdm-4.66.6-py3-none-any.whl (78 kB) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) - Using cached networkx-3.1-py3-none-any.whl (2.1 MB) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) - Using cached protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl (316 kB) Using cached filelock-3.16.1-py3-none-any.whl (16 kB) - Using cached fsspec-2024.9.0-py3-none-any.whl (179 kB) + Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB) + Using cached networkx-3.1-py3-none-any.whl (2.1 MB) Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) - Installing collected packages: openvino-telemetry, mpmath, traittypes, sympy, protobuf, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, onnx, openvino-dev, ipydatawidgets, pythreejs - Successfully installed filelock-3.16.1 fsspec-2024.9.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.1 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-dev-2024.4.0 openvino-telemetry-2024.1.0 protobuf-5.28.2 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu traittypes-0.2.1 + Installing collected packages: openvino-telemetry, mpmath, traittypes, tqdm, sympy, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, ipydatawidgets, pythreejs + Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.1.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.66.6 traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -193,28 +183,28 @@ Imports import collections import time from pathlib import Path - + import cv2 import ipywidgets as widgets import numpy as np from IPython.display import clear_output, display import openvino as ov - + # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) with open("notebook_utils.py", "w") as f: f.write(r.text) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", ) with open("engine3js.py", "w") as f: f.write(r.text) - + import notebook_utils as utils import engine3js as engine @@ -228,41 +218,31 @@ Download the model -We use ``omz_downloader``, which is a command line tool from the -``openvino-dev`` package. ``omz_downloader`` automatically creates a -directory structure and downloads the selected model. - .. code:: ipython3 + from notebook_utils import download_file + import tarfile + + # directory where model will be downloaded - base_model_dir = "model" - - # model name as named in Open Model Zoo - model_name = "human-pose-estimation-3d-0001" - # selected precision (FP32, FP16) - precision = "FP32" - - BASE_MODEL_NAME = f"{base_model_dir}/public/{model_name}/{model_name}" - model_path = Path(BASE_MODEL_NAME).with_suffix(".pth") - onnx_path = Path(BASE_MODEL_NAME).with_suffix(".onnx") - - ir_model_path = Path(f"model/public/{model_name}/{precision}/{model_name}.xml") - model_weights_path = Path(f"model/public/{model_name}/{precision}/{model_name}.bin") + base_model_dir = Path("model") + + download_file( + "https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/human-pose-estimation-3d-0001/human-pose-estimation-3d.tar.gz", + directory=base_model_dir, + ) + + ckpt_file = base_model_dir / "human-pose-estimation-3d-0001.pth" + + if not ckpt_file.exists(): + with tarfile.open(base_model_dir / "human-pose-estimation-3d.tar.gz") as f: + f.extractall(base_model_dir) - if not model_path.exists(): - download_command = f"omz_downloader " f"--name {model_name} " f"--output_dir {base_model_dir}" - ! $download_command .. parsed-literal:: - ################|| Downloading human-pose-estimation-3d-0001 ||################ - - ========== Downloading model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - - - ========== Unpacking model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - + model/human-pose-estimation-3d.tar.gz: 0%| | 0.00/17.6M [00:00 0 - + pose[0], pose[1] = ( pose[0] * frame.shape[1] / scaled_img.shape[1], pose[1] * frame.shape[0] / scaled_img.shape[0], ) - + # Draw joints. for edge in body_edges_2d: if was_found[edge[0]] and was_found[edge[1]]: @@ -512,7 +462,7 @@ from Open Model Zoo. -1, cv2.LINE_AA, ) - + return frame Main Processing Function @@ -529,18 +479,18 @@ webcam feed or a video file. """ 2D image as input, using OpenVINO as inference backend, get joints 3D coordinates, and draw 3D human skeleton in the scene - + :param source: The webcam number to feed the video stream with primary webcam set to "0", or the video path. :param flip: To be used by VideoPlayer function for flipping capture image. :param use_popup: False for showing encoded frames over this notebook, True for creating a popup window. :param skip_frames: Number of frames to skip at the beginning of the video. """ - + focal_length = -1 # default stride = 8 player = None skeleton_set = None - + try: # create video player to play with target fps video_path # get the frame from camera @@ -548,16 +498,16 @@ webcam feed or a video file. player = utils.VideoPlayer(source, flip=flip, fps=30, skip_first_frames=skip_frames) # start capturing player.start() - + input_image = player.next() # set the window size resize_scale = 450 / input_image.shape[1] windows_width = int(input_image.shape[1] * resize_scale) windows_height = int(input_image.shape[0] * resize_scale) - + # use visualization library engine3D = engine.Engine3js(grid=True, axis=True, view_width=windows_width, view_height=windows_height) - + if use_popup: # display the 3D human pose in this notebook, and origin frame in popup window display(engine3D.renderer) @@ -567,43 +517,43 @@ webcam feed or a video file. # set the 2D image box, show both human pose and image in the notebook imgbox = widgets.Image(format="jpg", height=windows_height, width=windows_width) display(widgets.HBox([engine3D.renderer, imgbox])) - + skeleton = engine.Skeleton(body_edges=body_edges) - + processing_times = collections.deque() - + while True: # grab the frame frame = player.next() if frame is None: print("Source ended") break - + # resize image and change dims to fit neural network input # (see https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/human-pose-estimation-3d-0001) scaled_img = cv2.resize(frame, dsize=(model.inputs[0].shape[3], model.inputs[0].shape[2])) - + if focal_length < 0: # Focal length is unknown focal_length = np.float32(0.8 * scaled_img.shape[1]) - + # inference start start_time = time.time() # get results inference_result = model_infer(scaled_img, stride) - + # inference stop stop_time = time.time() processing_times.append(stop_time - start_time) # Process the point to point coordinates of the data poses_3d, poses_2d = engine.parse_poses(inference_result, 1, stride, focal_length, True) - + # use processing times from last 200 frames if len(processing_times) > 200: processing_times.popleft() - + processing_time = np.mean(processing_times) * 1000 fps = 1000 / processing_time - + if len(poses_3d) > 0: # From here, you can rotate the 3D point positions using the function "draw_poses", # or you can directly make the correct mapping below to properly display the object image on the screen @@ -616,28 +566,28 @@ webcam feed or a video file. -y + np.ones(poses_3d[:, 2::4].shape) * 100, -x, ) - + poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] people = skeleton(poses_3d=poses_3d) - + try: engine3D.scene_remove(skeleton_set) except Exception: pass - + engine3D.scene_add(people) skeleton_set = people - + # draw 2D frame = draw_poses(frame, poses_2d, scaled_img, use_popup) - + else: try: engine3D.scene_remove(skeleton_set) skeleton_set = None except Exception: pass - + cv2.putText( frame, f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", @@ -648,7 +598,7 @@ webcam feed or a video file. 1, cv2.LINE_AA, ) - + if use_popup: cv2.imshow(title, frame) key = cv2.waitKey(1) @@ -662,9 +612,9 @@ webcam feed or a video file. frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90], )[1].tobytes() - + engine3D.renderer.render(engine3D.scene, engine3D.cam) - + except KeyboardInterrupt: print("Interrupted") except RuntimeError as e: @@ -711,10 +661,10 @@ picture on the left to interact. .. code:: ipython3 USE_WEBCAM = False - + cam_id = 0 video_path = "https://storage.openvinotoolkit.org/data/test_data/videos/face-demographics-walking.mp4" - + source = cam_id if USE_WEBCAM else video_path - + run_pose_estimation(source=source, flip=isinstance(source, int), use_popup=False) diff --git a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst index ce27c3006ac36a..e60951d40c75f9 100644 --- a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst @@ -46,20 +46,12 @@ Guide =2023.1.0" "tqdm" - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" "tqdm" "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -227,7 +219,7 @@ chair for example. .. parsed-literal:: - /tmp/ipykernel_59833/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_497205/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter3D(X, Y, Z, s=5, cmap="jet", marker="o", label="chair") @@ -321,7 +313,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - /tmp/ipykernel_59833/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_497205/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) diff --git a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png index 2bb95f87f90515..1821f275db1019 100644 --- a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png +++ b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9ed14955798d0fbcdc284da02f19a12ae92c89fe6c1f4760951a414f4047b66 -size 68016 +oid sha256:31cb7026c28d1308b88f61a6939b7e11a54948c3cb3e4f7a1a1b8a038871150f +size 68999 diff --git a/docs/notebooks/all_notebooks_paths.txt b/docs/notebooks/all_notebooks_paths.txt index 62e4b205f45f75..5cfa565a07d239 100644 --- a/docs/notebooks/all_notebooks_paths.txt +++ b/docs/notebooks/all_notebooks_paths.txt @@ -8,6 +8,7 @@ notebooks/auto-device/auto-device.ipynb notebooks/bark-text-to-audio/bark-text-to-audio.ipynb notebooks/big-transfer-quantization/tensorflow-bit-image-classification-nncf-quantization.ipynb notebooks/blip-visual-language-processing/blip-visual-language-processing.ipynb +notebooks/catvton/catvton.ipynb notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb notebooks/clip-zero-shot-image-classification/clip-zero-shot-classification.ipynb notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb @@ -54,11 +55,12 @@ notebooks/language-quantize-bert/language-quantize-bert.ipynb notebooks/latent-consistency-models-image-generation/latent-consistency-models-image-generation.ipynb notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb -notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb -notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb +notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb +notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-optimum.ipynb notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +notebooks/llm-agent-react/llm-agent-react.ipynb notebooks/llm-agent-react/llm-agent-react-langchain.ipynb notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb notebooks/llm-chatbot/llm-chatbot.ipynb @@ -144,11 +146,11 @@ notebooks/tensorflow-hub/tensorflow-hub.ipynb notebooks/tensorflow-object-detection-to-openvino/tensorflow-instance-segmentation-to-openvino.ipynb notebooks/tensorflow-object-detection-to-openvino/tensorflow-object-detection-to-openvino.ipynb notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb +notebooks/text-to-image-genai/text-to-image-genai.ipynb notebooks/tflite-selfie-segmentation/tflite-selfie-segmentation.ipynb notebooks/tflite-to-openvino/tflite-to-openvino.ipynb notebooks/tiny-sd-image-generation/tiny-sd-image-generation.ipynb notebooks/torchvision-zoo-to-openvino/convnext-classification.ipynb -notebooks/triposr-3d-reconstruction/triposr-3d-reconstruction.ipynb notebooks/typo-detector/typo-detector.ipynb notebooks/vehicle-detection-and-recognition/vehicle-detection-and-recognition.ipynb notebooks/vision-background-removal/vision-background-removal.ipynb diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst index 880bf539025c5e..aafda311c34c45 100644 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst @@ -226,23 +226,24 @@ Convert the Text Encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:808: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:861: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:813: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:866: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:836: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:889: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:839: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:892: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:935: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:988: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1426: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1486: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: @@ -351,13 +352,13 @@ suitable. This function repeats part of ``AmusedPipeline``. .. parsed-literal:: - /tmp/ipykernel_60662/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /tmp/ipykernel_498025/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -490,7 +491,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -696,7 +697,7 @@ model. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino @@ -707,7 +708,7 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -760,17 +761,17 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) @@ -794,7 +795,7 @@ Demo generation with quantized pipeline .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -878,7 +879,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. warnings.warn(\*args, \*\*kwargs) # noqa: B028 @@ -890,7 +891,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) return kl.mean(), kl.std() @@ -907,8 +908,8 @@ a rough estimate of generation quality. .. parsed-literal:: - Quantized pipeline Inception Score: 11.073053359985352 - Quantization speed-up: 2.07x + Quantized pipeline Inception Score: 11.0730562210083 + Quantization speed-up: 2.08x Interactive inference diff --git a/docs/notebooks/animate-anyone-with-output.rst b/docs/notebooks/animate-anyone-with-output.rst index 15459596dea5bf..a7debae86cef3f 100644 --- a/docs/notebooks/animate-anyone-with-output.rst +++ b/docs/notebooks/animate-anyone-with-output.rst @@ -1,7 +1,8 @@ Image-to-Video synthesis with AnimateAnyone and OpenVINO ======================================================== -|image0| +.. image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif + `AnimateAnyone `__ tackles the task of generating animation sequences from a single character image. It @@ -36,9 +37,7 @@ repo `__ and .. warning:: - - This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. - + This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT`` ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. **Table of contents:** @@ -71,9 +70,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif - - Prerequisites ------------- @@ -104,12 +100,6 @@ Prerequisites %load_ext skip_kernel_extension - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - - Note that we clone a fork of original repo with tweaked forward methods. .. code:: ipython3 @@ -164,11 +154,11 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -192,7 +182,7 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino Prepare base model @@ -216,13 +206,6 @@ Prepare base model local_dir=local_dir, ) - - -.. parsed-literal:: - - diffusion_pytorch_model.bin: 0%| | 0.00/3.44G [00:00:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :6: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - - Convert model to OpenVINO IR ---------------------------- @@ -430,7 +324,7 @@ semantic features are extracted through the CLIP image encoder for Cross-Attention. Temporal-Attention operates in the temporal dimension. Finally, the VAE decoder decodes the result into a video clip. -|image01| +.. image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png The pipeline contains 6 PyTorch modules: @@ -470,8 +364,6 @@ compression parameters. More details about weights compression can be found in `OpenVINO documentation `__. -.. |image01| image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png - .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -529,12 +421,14 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: + WARNING:nncf:NNCF provides best results with torch==2.1.2, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.1.2 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (32 / 32) │ 100% (32 / 32) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (32 / 32) | 100% (32 / 32) | + +--------------+---------------------------+-----------------------------------+ @@ -550,6 +444,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -575,11 +477,12 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (40 / 40) | 100% (40 / 40) | + +--------------+---------------------------+-----------------------------------+ @@ -595,6 +498,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + Reference UNet ~~~~~~~~~~~~~~ @@ -641,11 +552,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (270 / 270) │ 100% (270 / 270) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (270 / 270) | 100% (270 / 270) | + +--------------+---------------------------+-----------------------------------+ @@ -661,6 +573,14 @@ step. + + + + + + + + Denoising UNet ~~~~~~~~~~~~~~ @@ -734,11 +654,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (534 / 534) │ 100% (534 / 534) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (534 / 534) | 100% (534 / 534) | + +--------------+---------------------------+-----------------------------------+ @@ -754,6 +675,14 @@ step. + + + + + + + + Pose Guider ~~~~~~~~~~~ @@ -780,11 +709,12 @@ efficiently integrate pose control signals into the denoising process. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (8 / 8) │ 100% (8 / 8) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (8 / 8) | 100% (8 / 8) | + +--------------+---------------------------+-----------------------------------+ @@ -800,6 +730,14 @@ efficiently integrate pose control signals into the denoising process. + + + + + + + + Image Encoder ~~~~~~~~~~~~~ @@ -825,18 +763,19 @@ required for both reference and denoising UNets. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (146 / 146) │ 100% (146 / 146) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (146 / 146) | 100% (146 / 146) | + +--------------+---------------------------+-----------------------------------+ @@ -852,6 +791,14 @@ required for both reference and denoising UNets. + + + + + + + + Inference --------- @@ -877,6 +824,15 @@ For starting work, please select inference device from dropdown list. device = device_widget() + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=5, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'GPU.3', 'AUTO'), value='A… + + + .. code:: ipython3 class OVPose2VideoPipeline(Pose2VideoPipeline): @@ -1174,7 +1130,7 @@ Video post-processing .. raw:: html @@ -1248,23 +1204,9 @@ Interactive inference demo = make_demo(fn=generate) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(debug=False, share=True) + demo.queue().launch(debug=True, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/" - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 135554acc38de4..9f9130a4fe0db2 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -58,22 +58,8 @@ Imports .. code:: ipython3 - import platform - %pip install -q "openvino>=2023.1.0" - %pip install -q opencv-python - if platform.system() != "windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + %pip install -q opencv-python "matplotlib>=3.4" .. code:: ipython3 @@ -126,14 +112,12 @@ the person in each frame of the video. ################|| Downloading person-detection-0202 ||################ - ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.xml - + ========== Retrieving model/intel/person-detection-0202/FP16/person-detection-0202.xml from the cache - ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.bin + ========== Retrieving model/intel/person-detection-0202/FP16/person-detection-0202.bin from the cache - Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -355,8 +339,8 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 63.95 fps - + average throuput in sync mode: 55.59 fps + Async Mode ~~~~~~~~~~ @@ -494,8 +478,8 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 108.46 fps - + average throuput in async mode: 75.17 fps + Compare the performance ~~~~~~~~~~~~~~~~~~~~~~~ @@ -637,5 +621,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 144.01 fps - + average throughput in async mode with async infer queue: 103.81 fps + diff --git a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png index f80b64476e19ea..3694e16797b727 100644 --- a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png +++ b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b164e09df4e90dc87d63caf35cc832021fbd147354a5300605164fce212e36b8 -size 29453 +oid sha256:fd71c3c8d066b8a16264c215b773c6200e981634ac24d04db99b0288d1ea1cca +size 30409 diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index 5f7d8dfc61502f..2ebcbe7d80deb2 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -82,18 +82,15 @@ Import modules and create Core import platform # Install required packages - %pip install -q "openvino>=2023.1.0" "numpy<2" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.1.0" "matplotlib>=3.4" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -200,16 +197,16 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [23:32:04.6480]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [23:32:04.6480]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [23:32:04.6481]I[schedule.cpp:17][AUTO] scheduler starting - [23:32:04.6481]I[auto_schedule.cpp:181][AUTO] select device:CPU - [23:32:04.7787]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 130.622171 ms - [23:32:04.7789]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [23:32:04.7790]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [22:41:57.1267]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [22:41:57.1268]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [22:41:57.1268]I[schedule.cpp:17][AUTO] scheduler starting + [22:41:57.1269]I[auto_schedule.cpp:181][AUTO] select device:CPU + [22:41:57.2582]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 131.300219 ms + [22:41:57.2583]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [22:41:57.2584]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -223,7 +220,7 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: Deleted compiled_model - [23:32:04.7847]I[schedule.cpp:308][AUTO] scheduler ending + [22:41:57.2639]I[schedule.cpp:308][AUTO] scheduler ending Explicitly pass AUTO as device_name to Core::compile_model API @@ -381,7 +378,7 @@ executed on CPU until GPU is ready. .. parsed-literal:: - Time to load model using AUTO device and get first inference: 0.13 seconds. + Time to load model using AUTO device and get first inference: 0.12 seconds. .. code:: ipython3 @@ -556,12 +553,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 183.87fps, latency: 31.26ms, time interval: 10.01s - throughput: 184.60fps, latency: 31.70ms, time interval: 10.00s - throughput: 183.24fps, latency: 31.93ms, time interval: 10.01s - throughput: 184.05fps, latency: 31.75ms, time interval: 10.00s - throughput: 184.40fps, latency: 31.77ms, time interval: 10.00s - throughput: 178.41fps, latency: 32.83ms, time interval: 10.02s + throughput: 179.70fps, latency: 32.12ms, time interval: 10.00s + throughput: 183.61fps, latency: 31.86ms, time interval: 10.01s + throughput: 183.96fps, latency: 31.88ms, time interval: 10.01s + throughput: 183.98fps, latency: 31.91ms, time interval: 10.00s + throughput: 183.26fps, latency: 31.98ms, time interval: 10.01s + throughput: 183.40fps, latency: 32.01ms, time interval: 10.00s Done @@ -607,12 +604,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 140.52fps, latency: 6.62ms, time interval: 10.01s - throughput: 142.84fps, latency: 6.60ms, time interval: 10.00s - throughput: 142.14fps, latency: 6.60ms, time interval: 10.00s - throughput: 142.63fps, latency: 6.60ms, time interval: 10.00s - throughput: 143.11fps, latency: 6.61ms, time interval: 10.01s - throughput: 132.99fps, latency: 7.13ms, time interval: 10.01s + throughput: 130.56fps, latency: 7.18ms, time interval: 10.00s + throughput: 142.51fps, latency: 6.61ms, time interval: 10.01s + throughput: 142.47fps, latency: 6.62ms, time interval: 10.00s + throughput: 142.46fps, latency: 6.61ms, time interval: 10.00s + throughput: 142.63fps, latency: 6.61ms, time interval: 10.00s + throughput: 142.73fps, latency: 6.60ms, time interval: 10.00s Done diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index 5ef6531526d989..cc037738f18096 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cf9ec5f2d8e34510d31d10190c4d7f269bb83a2800891ff865e09ee85e80d95 -size 27103 +oid sha256:1bedd8ff3e65a23fb4af380958a261d0916d2e0134b9426652a2779bdc06d6de +size 26887 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index edc7de70aeb565..21be57ac89d68d 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed265008638a6c38fbfc8397e6bb20d0e1fc900df893f30b7238c518d2380b2b -size 40093 +oid sha256:ed1ab24c30040707a36155169f4aaa91a5bff6cb48a2c5d10401ecbd87ca6f54 +size 40117 diff --git a/docs/notebooks/catvton-with-output.rst b/docs/notebooks/catvton-with-output.rst new file mode 100644 index 00000000000000..a7a9a04359f338 --- /dev/null +++ b/docs/notebooks/catvton-with-output.rst @@ -0,0 +1,360 @@ +Virtual Try-On with CatVTON and OpenVINO +======================================== + +Virtual try-on methods based on diffusion models achieve realistic +try-on effects but replicate the backbone network as a ReferenceNet or +leverage additional image encoders to process condition inputs, +resulting in high training and inference costs. `In this +work `__, authors rethink the necessity +of ReferenceNet and image encoders and innovate the interaction between +garment and person, proposing CatVTON, a simple and efficient virtual +try-on diffusion model. It facilitates the seamless transfer of in-shop +or worn garments of arbitrary categories to target persons by simply +concatenating them in spatial dimensions as inputs. The efficiency of +the model is demonstrated in three aspects: 1. Lightweight network. Only +the original diffusion modules are used, without additional network +modules. The text encoder and cross attentions for text injection in the +backbone are removed, further reducing the parameters by 167.02M. 2. +Parameter-efficient training. We identified the try-on relevant modules +through experiments and achieved high-quality try-on effects by training +only 49.57M parameters (∼5.51% of the backbone network’s parameters). 3. +Simplified inference. CatVTON eliminates all unnecessary conditions and +preprocessing steps, including pose estimation, human parsing, and text +input, requiring only garment reference, target person image, and mask +for the virtual try-on process. Extensive experiments demonstrate that +CatVTON achieves superior qualitative and quantitative results with +fewer prerequisites and trainable parameters than baseline methods. +Furthermore, CatVTON shows good generalization in in-the-wild scenarios +despite using open-source datasets with only 73K samples. + +Teaser image from `CatVTON +GitHub `__ |teaser| + +In this tutorial we consider how to convert and run this model using +OpenVINO. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert the model to OpenVINO + IR <#convert-the-model-to-openvino-ir>`__ +- `Compiling models <#compiling-models>`__ +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +.. |teaser| image:: https://github.com/Zheng-Chong/CatVTON/blob/edited/resource/img/teaser.jpg?raw=true + +Prerequisites +------------- + + + +.. code:: ipython3 + + import platform + + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" + %pip install -q "openvino>=2024.4" + %pip install -q "torch>=2.1" "diffusers>=0.29.1" torchvision opencv_python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q fvcore "pillow" "tqdm" "gradio>=4.36" "omegaconf==2.4.0.dev3" av pycocotools cloudpickle scipy accelerate "transformers>=4.27.3" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/aleksandr-mokrov/openvino_notebooks/refs/heads/catvton/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + + + +.. parsed-literal:: + + 741 + + + +.. code:: ipython3 + + from cmd_helper import clone_repo + + + clone_repo("https://github.com/Zheng-Chong/CatVTON.git", "3b795364a4d2f3b5adb365f39cdea376d20bc53c") + + + + +.. parsed-literal:: + + PosixPath('CatVTON') + + + +Convert the model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation (IR). `OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. + +``ov_catvton_helper.py`` script contains helper function for models +downloading and models conversion, please check its content if you +interested in conversion details. + +To download checkpoints and load models, just call the helper function +``download_models``. It takes care about it. Functions +``convert_pipeline_models`` and ``convert_automasker_models`` will +convert models from pipeline and ``automasker`` in OpenVINO format. + +The original pipeline contains VAE encoder and decoder and UNET. +|CatVTON-overview| + +The ``automasker`` contains ``DensePose`` with +``detectron2.GeneralizedRCNN`` model and ``SCHP`` (``LIP`` and ``ATR`` +version). + +.. |CatVTON-overview| image:: https://github.com/user-attachments/assets/e35c8dab-1c54-47b1-a73b-2a62e6cdca7c + +.. code:: ipython3 + + from pathlib import Path + + from ov_catvton_helper import download_models, convert_pipeline_models, convert_automasker_models + + + MODEL_DIR = Path("models") + VAE_ENCODER_PATH = MODEL_DIR / "vae_encoder.xml" + VAE_DECODER_PATH = MODEL_DIR / "vae_decoder.xml" + UNET_PATH = MODEL_DIR / "unet.xml" + DENSEPOSE_PROCESSOR_PATH = MODEL_DIR / "densepose_processor.xml" + SCHP_PROCESSOR_ATR = MODEL_DIR / "schp_processor_atr.xml" + SCHP_PROCESSOR_LIP = MODEL_DIR / "schp_processor_lip.xml" + + + pipeline, mask_processor, automasker = download_models(MODEL_DIR) + convert_pipeline_models(pipeline, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) + convert_automasker_models(automasker, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) + + +.. parsed-literal:: + + Note: switching to '3b795364a4d2f3b5adb365f39cdea376d20bc53c'. + + You are in 'detached HEAD' state. You can look around, make experimental + changes and commit them, and you can discard any commits you make in this + state without impacting any branches by switching back to a branch. + + If you want to create a new branch to retain commits you create, you may + do so (now or later) by using -c with the switch command. Example: + + git switch -c + + Or undo this operation with: + + git switch - + + Turn off this advice by setting config variable advice.detachedHead to false + + HEAD is now at 3b79536 Update default base model path + + + +.. parsed-literal:: + + Fetching 10 files: 0%| | 0/10 [00:00= 64: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if dim % default_overall_up_factor != 0: + + +Compiling models +---------------- + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import openvino as ov + + from notebook_utils import device_widget + + + core = ov.Core() + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +``get_compiled_pipeline`` and ``get_compiled_automasker`` functions +defined in ``ov_catvton_helper.py`` provides convenient way for getting +the pipeline and the ``automasker`` with compiled ov-models that are +compatible with the original interface. It accepts the original pipeline +and ``automasker``, inference device and directories with converted +models as arguments. Under the hood we create callable wrapper classes +for compiled models to allow interaction with original pipelines. Note +that all of wrapper classes return ``torch.Tensor``\ s instead of +``np.array``\ s. And then insert wrappers instances in the pipeline. + +.. code:: ipython3 + + from ov_catvton_helper import get_compiled_pipeline, get_compiled_automasker + + + pipeline = get_compiled_pipeline(pipeline, core, device, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) + automasker = get_compiled_automasker(automasker, core, device, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) + +Interactive inference +--------------------- + + + +Please select below whether you would like to use the quantized models +to launch the interactive demo. + +.. code:: ipython3 + + from gradio_helper import make_demo + + + output_dir = "output" + demo = make_demo(pipeline, mask_processor, automasker, output_dir) + try: + demo.launch(debug=False) + except Exception: + demo.launch(debug=False, share=True) + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index 2e5a45826eaceb..fd572a83ffb834 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -112,14 +112,7 @@ tokenizer and preparing the images. .. code:: ipython3 - import platform - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "openvino>=2023.1.0" "transformers[torch]>=4.30" "datasets" "nncf>=2.6.0" "torch>=2.1" Pillow - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "matplotlib>=3.4" "openvino>=2023.1.0" "transformers[torch]>=4.30" "datasets" "nncf>=2.6.0" "torch>=2.1" Pillow import requests diff --git a/docs/notebooks/controlnet-stable-diffusion-with-output.rst b/docs/notebooks/controlnet-stable-diffusion-with-output.rst index a922fe445a7897..3ab43d897ea658 100644 --- a/docs/notebooks/controlnet-stable-diffusion-with-output.rst +++ b/docs/notebooks/controlnet-stable-diffusion-with-output.rst @@ -198,7 +198,7 @@ Prerequisites .. code:: ipython3 %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" - %pip install -q "diffusers>=0.14.0" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "diffusers>=0.14.0" "matplotlib>=3.4" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" "datasets>=2.14.6" "nncf>=2.7.0" import requests diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index e5a66d4f74ae17..2baaf0043e7f04 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -39,15 +39,12 @@ Guide =2024.0.0" "requests" "tqdm" "transformers>=4.31" "onnx<1.16.2" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" + "openvino>=2024.4.0" "requests" "tqdm" "transformers>=4.31" "onnx!=1.16.2" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) - Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -166,7 +163,7 @@ NLP model from Hugging Face and export it in ONNX format: .. code:: ipython3 from transformers import AutoModelForSequenceClassification, AutoTokenizer - from transformers.onnx import export, FeaturesManager + import torch ONNX_NLP_MODEL_PATH = MODEL_DIRECTORY_PATH / "distilbert.onnx" @@ -175,37 +172,27 @@ NLP model from Hugging Face and export it in ONNX format: # initialize tokenizer tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") - # get model onnx config function for output feature format sequence-classification - model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(hf_model, feature="sequence-classification") - # fill onnx config based on pytorch model config - onnx_config = model_onnx_config(hf_model.config) - - # export to onnx format - export( - preprocessor=tokenizer, - model=hf_model, - config=onnx_config, - opset=onnx_config.default_onnx_opset, - output=ONNX_NLP_MODEL_PATH, - ) + if not ONNX_NLP_MODEL_PATH.exists(): + inputs = tokenizer("Hi, how are you?", return_tensors="pt") + input_names = list(inputs.keys()) + dynamic_axes = {input_name: {0: "batch_size", 1: "seq_length"} for input_name in input_names} + torch.onnx.export( + hf_model, args=dict(inputs), input_names=input_names, output_names=["logits"], dynamic_axes=dynamic_axes, f=ONNX_NLP_MODEL_PATH, opset_version=14 + ) + print(f"ONNX model exported to {ONNX_NLP_MODEL_PATH}") .. parsed-literal:: - 2024-10-07 23:35:08.862114: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:35:08.907564: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:48:30.842642: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:48:30.876775: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:35:09.444317: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mask, torch.tensor(torch.finfo(scores.dtype).min) - - + 2024-11-04 22:48:31.539454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: - (['input_ids', 'attention_mask'], ['logits']) - + ONNX model exported to model/distilbert.onnx Fetch @@ -673,7 +660,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-10-07 23:35:26.468759: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-11-04 22:48:47.716205: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... @@ -724,28 +711,12 @@ Resnet50 model that was exported to the ONNX format: prep.input("input.1").model().set_layout(ov.Layout("nchw")) ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw") - - -.. parsed-literal:: - - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - +.. code:: python -.. parsed-literal:: - - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + # Legacy Model Optimizer API + from openvino.tools import mo + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw") Changing Model Layout ^^^^^^^^^^^^^^^^^^^^^ @@ -772,26 +743,17 @@ and the layout of an original model: prep.input("input.1").model().set_layout(ov.Layout("nchw")) ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw->nhwc") - - # alternatively use source_layout and target_layout parameters - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc") +Legacy Model Optimizer API +========================== +.. code:: python -.. parsed-literal:: + from openvino.tools import mo - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw->nhwc") + # alternatively use source_layout and target_layout parameters + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc") Specifying Mean and Scale Values ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -819,25 +781,18 @@ more examples. ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - - ov_model = mo.convert_model( - ONNX_CV_MODEL_PATH, - mean_values=[255 * x for x in [0.485, 0.456, 0.406]], - scale_values=[255 * x for x in [0.229, 0.224, 0.225]], - ) +.. code:: python + # Legacy Model Optimizer API -.. parsed-literal:: + from openvino.tools import mo - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + ov_model = mo.convert_model( + ONNX_CV_MODEL_PATH, + mean_values=[255 * x for x in [0.485, 0.456, 0.406]], + scale_values=[255 * x for x in [0.229, 0.224, 0.225]], + ) Reversing Input Channels ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -862,20 +817,12 @@ the color channels before inference. prep.input("input.1").preprocess().reverse_channels() ov_model = prep.build() -.. code:: ipython3 - - # Legacy Model Optimizer API - from openvino.tools import mo - - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, reverse_input_channels=True) - - -.. parsed-literal:: +.. code:: python - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + # Legacy Model Optimizer API + from openvino.tools import mo + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, reverse_input_channels=True) Cutting Off Parts of a Model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 99bc625a13c257..6e1c039f7013c6 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -192,7 +192,7 @@ And print results Predicted Class: 281 Predicted Label: n02123045 tabby, tabby cat - Predicted Probability: 0.4793865978717804 + Predicted Probability: 0.4661690592765808 Convert the model to OpenVINO Intermediate representation format diff --git a/docs/notebooks/cross-lingual-books-alignment-with-output.rst b/docs/notebooks/cross-lingual-books-alignment-with-output.rst index 87fad52e92709a..b116f0e1f5cda1 100644 --- a/docs/notebooks/cross-lingual-books-alignment-with-output.rst +++ b/docs/notebooks/cross-lingual-books-alignment-with-output.rst @@ -69,14 +69,7 @@ Guide =3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu requests pysbd transformers "torch>=2.1" "openvino>=2023.1.0" seaborn ipywidgets + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu requests pysbd transformers "torch>=2.1" "openvino>=2023.1.0" seaborn ipywidgets "matplotlib>=3.4" Get Books --------- diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index f60ccb2fadd9e6..30778bafc8e884 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -98,20 +98,12 @@ Guide =2023.3.0" "monai>=0.9.1" "torchmetrics>=0.11.0" "nncf>=2.8.0" "opencv-python" torch tqdm --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.3.0" "monai>=0.9.1" "torchmetrics>=0.11.0" "nncf>=2.8.0" "opencv-python" "matplotlib>=3.4" torch tqdm --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -162,10 +154,10 @@ Imports .. parsed-literal:: - 2024-10-07 23:35:52.753512: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:35:52.788105: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:49:10.827255: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:49:10.861330: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:35:53.378916: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 22:49:11.454332: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -231,7 +223,7 @@ notebook `__. .. parsed-literal:: - /tmp/ipykernel_72009/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_503635/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -452,7 +444,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -534,18 +526,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 249913 / 262144 (95.3%) - Greatest absolute difference: 4.628173828125 at index (0, 0, 430, 337) (up to 1e-05 allowed) - Greatest relative difference: 31968.152067381572 at index (0, 0, 102, 269) (up to 1e-05 allowed) + Mismatched elements: 245783 / 262144 (93.8%) + Greatest absolute difference: 3.1180567741394043 at index (0, 0, 474, 435) (up to 1e-05 allowed) + Greatest relative difference: 16087.83647354372 at index (0, 0, 37, 224) (up to 1e-05 allowed) _check_trace( @@ -671,7 +663,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.78 ms + [ INFO ] Read model took 8.85 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -685,7 +677,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 236.15 ms + [ INFO ] Compile model took 253.47 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -722,17 +714,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 48.84 ms + [ INFO ] First inference took 56.51 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 429 iterations - [ INFO ] Duration: 15015.79 ms + [ INFO ] Count: 406 iterations + [ INFO ] Duration: 15019.48 ms [ INFO ] Latency: - [ INFO ] Median: 34.71 ms - [ INFO ] Average: 34.77 ms - [ INFO ] Min: 34.38 ms - [ INFO ] Max: 37.16 ms - [ INFO ] Throughput: 28.57 FPS + [ INFO ] Median: 35.01 ms + [ INFO ] Average: 36.77 ms + [ INFO ] Min: 34.63 ms + [ INFO ] Max: 48.05 ms + [ INFO ] Throughput: 27.03 FPS .. code:: ipython3 @@ -758,7 +750,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 10.92 ms + [ INFO ] Read model took 10.78 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -772,7 +764,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 239.20 ms + [ INFO ] Compile model took 250.08 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -809,17 +801,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 29.32 ms + [ INFO ] First inference took 29.09 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 883 iterations - [ INFO ] Duration: 15004.05 ms + [ INFO ] Count: 938 iterations + [ INFO ] Duration: 15008.12 ms [ INFO ] Latency: - [ INFO ] Median: 15.57 ms - [ INFO ] Average: 16.79 ms - [ INFO ] Min: 15.15 ms - [ INFO ] Max: 22.01 ms - [ INFO ] Throughput: 58.85 FPS + [ INFO ] Median: 15.77 ms + [ INFO ] Average: 15.80 ms + [ INFO ] Min: 15.47 ms + [ INFO ] Max: 17.13 ms + [ INFO ] Throughput: 62.50 FPS Visually Compare Inference Results @@ -913,7 +905,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1728337035 + Visualizing results with seed 1730757034 @@ -997,7 +989,7 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.32 seconds, fps:29.73 + Total time for 68 frames: 2.36 seconds, fps:29.25 References @@ -1010,7 +1002,7 @@ Repository `__ - `Neural Network Compression Framework for fast model inference `__ - `OpenVINO API Tutorial `__ - `OpenVINO PyPI (pip -install openvino-dev) `__ +install openvino) `__ **Kits19 Data** - `Kits19 Challenge Homepage `__ - `Kits19 GitHub diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index 03b5eb1e3fd9f0..5aa37909b71cf7 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1aac55db34be1df744fd19868762a8b4572a8e19683af72a57d7176b1486af0c -size 380239 +oid sha256:894600de56af211d4cc3e64ee092b5a62d1b0158c51048d17accadddea0f046e +size 382725 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index ccd6216d26268d..409d2495e2fea6 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -25,9 +25,8 @@ In this tutorial we consider how to convert and run DDColor using OpenVINO. Additionally, we will demonstrate how to optimize this model using `NNCF `__. -🪄 Let’s start to explore magic of image colorization! - -**Table of contents:** +🪄 Let’s start to explore magic of image colorization! #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Load PyTorch model <#load-pytorch-model>`__ @@ -68,7 +67,7 @@ Prerequisites .. code:: ipython3 import platform - + %pip install -q "nncf>=2.11.0" "torch>=2.1" "torchvision" "timm" "opencv_python" "pillow" "PyYAML" "scipy" "scikit-image" "datasets" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -Uq "openvino>=2024.3.0" if platform.python_version_tuple()[1] in ["8", "9"]: @@ -89,14 +88,14 @@ Prerequisites import sys from pathlib import Path import requests - + repo_dir = Path("DDColor") - + if not repo_dir.exists(): !git clone https://github.com/piddnad/DDColor.git - + sys.path.append(str(repo_dir)) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) @@ -106,12 +105,12 @@ Prerequisites .. parsed-literal:: Cloning into 'DDColor'... - remote: Enumerating objects: 233, done. - remote: Counting objects: 100% (76/76), done. - remote: Compressing objects: 100% (42/42), done. - remote: Total 233 (delta 54), reused 34 (delta 34), pack-reused 157 (from 1) - Receiving objects: 100% (233/233), 13.34 MiB | 17.27 MiB/s, done. - Resolving deltas: 100% (80/80), done. + remote: Enumerating objects: 241, done. + remote: Counting objects: 100% (84/84), done. + remote: Compressing objects: 100% (49/49), done. + remote: Total 241 (delta 57), reused 37 (delta 35), pack-reused 157 (from 1) + Receiving objects: 100% (241/241), 14.10 MiB | 21.95 MiB/s, done. + Resolving deltas: 100% (83/83), done. @@ -129,6 +128,13 @@ Prerequisites except Exception: from inference.colorization_pipeline_hf import DDColorHF, ImageColorizationPipelineHF + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + Load PyTorch model ------------------ @@ -143,14 +149,14 @@ models from DDColor family. .. code:: ipython3 import torch - + model_name = "ddcolor_paper_tiny" - + ddcolor_model = DDColorHF.from_pretrained(f"piddnad/{model_name}") - - + + colorizer = ImageColorizationPipelineHF(model=ddcolor_model, input_size=512) - + ddcolor_model.to("cpu") colorizer.device = torch.device("cpu") @@ -163,12 +169,12 @@ Run PyTorch model inference import cv2 import PIL - + IMG_PATH = "DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg" - - + + img = cv2.imread(IMG_PATH) - + PIL.Image.fromarray(img[:, :, ::-1]) @@ -207,9 +213,9 @@ loading on device using ``core.complie_model``. import openvino as ov import torch - + OV_COLORIZER_PATH = Path("ddcolor.xml") - + if not OV_COLORIZER_PATH.exists(): ov_model = ov.convert_model(ddcolor_model, example_input=torch.ones((1, 3, 512, 512)), input=[1, 3, 512, 512]) ov.save_model(ov_model, OV_COLORIZER_PATH) @@ -224,11 +230,11 @@ Select one of supported devices for inference using dropdown list. .. code:: ipython3 from notebook_utils import device_widget - + core = ov.Core() - + device = device_widget() - + device @@ -250,36 +256,36 @@ Select one of supported devices for inference using dropdown list. import numpy as np import torch import torch.nn.functional as F - - + + def process(img, compiled_model): # Preprocess input image height, width = img.shape[:2] - + # Normalize to [0, 1] range img = (img / 255.0).astype(np.float32) orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] # (h, w, 1) - + # Resize rgb image -> lab -> get grey -> rgb img = cv2.resize(img, (512, 512)) img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + # Transpose HWC -> CHW and add batch dimension tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0) - + # Run model inference output_ab = compiled_model(tensor_gray_rgb)[0] - + # Postprocess result # resize ab -> concat original l -> rgb output_ab_resize = F.interpolate(torch.from_numpy(output_ab), size=(height, width))[0].float().numpy().transpose(1, 2, 0) output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1) output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR) - + output_img = (output_bgr * 255.0).round().astype(np.uint8) - + return output_img .. code:: ipython3 @@ -318,7 +324,7 @@ improve model inference speed. .. code:: ipython3 from notebook_utils import quantization_widget - + to_quantize = quantization_widget() to_quantize @@ -334,15 +340,15 @@ improve model inference speed. .. code:: ipython3 import requests - + OV_INT8_COLORIZER_PATH = Path("ddcolor_int8.xml") compiled_int8_model = None - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Collect quantization dataset @@ -357,12 +363,12 @@ dataset from Hugging Face as calibration data. .. code:: ipython3 %%skip not $to_quantize.value - + from datasets import load_dataset - + subset_size = 300 calibration_data = [] - + if not OV_INT8_COLORIZER_PATH.exists(): dataset = load_dataset("ummagumm-a/colorization_dataset", split="train", streaming=True).shuffle(seed=42).take(subset_size) for idx, batch in enumerate(dataset): @@ -374,7 +380,7 @@ dataset from Hugging Face as calibration data. img_l = cv2.cvtColor(np.stack([img, img, img], axis=2), cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + image = np.expand_dims(img_gray_rgb.transpose((2, 0, 1)).astype(np.float32), axis=0) calibration_data.append(image) @@ -386,9 +392,9 @@ Perform model quantization .. code:: ipython3 %%skip not $to_quantize.value - + import nncf - + if not OV_INT8_COLORIZER_PATH.exists(): ov_model = core.read_model(OV_COLORIZER_PATH) quantized_model = nncf.quantize( @@ -406,10 +412,10 @@ Perform model quantization .. parsed-literal:: - 2024-10-07 23:39:33.824396: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:39:33.863560: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 22:52:53.152561: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 22:52:53.191342: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:39:34.271973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 22:52:53.595160: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -446,7 +452,7 @@ Run INT8 model inference .. code:: ipython3 from IPython.display import display - + if OV_INT8_COLORIZER_PATH.exists(): compiled_int8_model = core.compile_model(OV_INT8_COLORIZER_PATH, device.value) img = cv2.imread("DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg") @@ -466,9 +472,9 @@ Compare FP16 and INT8 model size .. code:: ipython3 fp16_ir_model_size = OV_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 - + print(f"FP16 model size: {fp16_ir_model_size:.2f} MB") - + if OV_INT8_COLORIZER_PATH.exists(): quantized_model_size = OV_INT8_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 print(f"INT8 model size: {quantized_model_size:.2f} MB") @@ -507,17 +513,17 @@ Tool =2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" + %pip install -q "openvino>=2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" "matplotlib>=3.4" %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu @@ -240,7 +238,7 @@ is preprocessed image height, ``W`` is preprocessed image width. xFormers not available xFormers not available - /tmp/ipykernel_74875/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_506168/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_path, map_location="cpu")) @@ -272,7 +270,7 @@ is preprocessed image height, ``W`` is preprocessed image width. .. parsed-literal:: - + @@ -306,13 +304,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -404,7 +402,7 @@ range. .. parsed-literal:: - + @@ -626,7 +624,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 14.01 seconds. Total FPS (including video processing): 4.28.Inference FPS: 9.46 + Processed 60 frames in 13.24 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.68 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -653,7 +651,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -786,10 +784,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-10-07 23:47:57.736195: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:47:57.768920: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 23:01:18.047102: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:01:18.080343: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:47:58.341833: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 23:01:18.654050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -915,10 +913,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.89 seconds. Total FPS (including video processing): 4.65.Inference FPS: 12.78 + Processed 60 frames in 12.60 seconds. Total FPS (including video processing): 4.76.Inference FPS: 13.12 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -998,8 +996,8 @@ Tool =2023.3.0" "datasets>=2.14.6" "nncf" "tqdm" - %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" + %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" "matplotlib>=3.4" %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu if platform.python_version_tuple()[1] in ["8", "9"]: @@ -90,9 +90,9 @@ Prerequisites remote: Counting objects: 100% (161/161), done. remote: Compressing objects: 100% (120/120), done. remote: Total 441 (delta 115), reused 44 (delta 41), pack-reused 280 (from 1) - Receiving objects: 100% (441/441), 237.90 MiB | 23.22 MiB/s, done. + Receiving objects: 100% (441/441), 237.90 MiB | 24.22 MiB/s, done. Resolving deltas: 100% (158/158), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. WARNING: typer 0.12.5 does not provide the extra 'all' @@ -284,13 +284,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -573,7 +573,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.58 seconds. Total FPS (including video processing): 4.42.Inference FPS: 10.20 + Processed 60 frames in 13.24 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.62 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -600,7 +600,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -733,10 +733,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-10-07 23:57:01.421550: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 23:57:01.453134: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-04 23:10:13.897258: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:10:13.929954: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 23:57:02.034824: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-04 23:10:14.502746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -862,10 +862,10 @@ data. .. parsed-literal:: - Processed 60 frames in 13.06 seconds. Total FPS (including video processing): 4.59.Inference FPS: 12.23 + Processed 60 frames in 12.75 seconds. Total FPS (including video processing): 4.70.Inference FPS: 12.76 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -945,9 +945,9 @@ Tool =2023.1.0" - + import os import requests + from pathlib import Path + import platform + + + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + with open("notebook_utils.py", "w") as f: + f.write(r.text) - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + if not Path("pip_helper.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", + ) + open("pip_helper.py", "w").write(r.text) + + from pip_helper import pip_install + + if platform.system() == "Darwin": + pip_install("numpy<2.0.0") + pip_install("torch", "torchvision", "opencv-python", "wheel", "--extra-index-url", "https://download.pytorch.org/whl/cpu") + pip_install("git+https://github.com/facebookresearch/detectron2.git", "--extra-index-url", "https://download.pytorch.org/whl/cpu") + pip_install("openvino>=2023.1.0") .. parsed-literal:: - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Requirement already satisfied: torch in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: torchvision in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.19.1+cpu) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.10.0.84) + Requirement already satisfied: wheel in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.44.0) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.16.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) + Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) + Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu + Collecting git+https://github.com/facebookresearch/detectron2.git + Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-9ds1xx43 +.. parsed-literal:: + Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-9ds1xx43 -.. parsed-literal:: - 24692 +.. parsed-literal:: + Resolved https://github.com/facebookresearch/detectron2.git to commit 8d85329aed8506ea3672e3e208971345973ea761 + Preparing metadata (setup.py): started + Preparing metadata (setup.py): finished with status 'done' + Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) + Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) + Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) + Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) + Collecting hydra-core>=1.1 (from detectron2==0.6) + Using cached hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB) + Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6) + Using cached https://download.pytorch.org/whl/iopath-0.1.9-py3-none-any.whl (27 kB) + Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) + Collecting omegaconf<2.4,>=2.1 (from detectron2==0.6) + Using cached omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.1) + Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) + Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) + Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) + Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) + Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.66.6) + Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (2.10.1) + Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) + Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) + Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.54.1) + Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) + Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) + Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) + Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) + Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) + Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) + Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) + Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.0.2) + Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) + Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) + Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.67.1) + Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.35.0) + Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) + Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) + Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) + Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) + Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) + Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) + Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) + Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.44.0) + Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) + Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) + Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) + Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) + Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) + Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) + Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) + Using cached hydra_core-1.3.2-py3-none-any.whl (154 kB) + Using cached omegaconf-2.3.0-py3-none-any.whl (79 kB) + Building wheels for collected packages: detectron2 + Building wheel for detectron2 (setup.py): started + Building wheel for detectron2 (setup.py): finished with status 'done' + Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313237 sha256=7cd84a15a89de76a7ab5b648f2fb7ebff63b7e43ffc90c7f19a568d16858de8a + Stored in directory: /tmp/pip-ephem-wheel-cache-uvptv5zg/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 + Successfully built detectron2 + Installing collected packages: omegaconf, iopath, hydra-core, detectron2 + Attempting uninstall: omegaconf + Found existing installation: omegaconf 2.4.0.dev3 + Uninstalling omegaconf-2.4.0.dev3: + Successfully uninstalled omegaconf-2.4.0.dev3 + Attempting uninstall: iopath + Found existing installation: iopath 0.1.10 + Uninstalling iopath-0.1.10: + Successfully uninstalled iopath-0.1.10 + Successfully installed detectron2-0.6 hydra-core-1.3.2 iopath-0.1.9 omegaconf-2.3.0 + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) Define helpers for PyTorch model initialization and conversion diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index ffb8e4a0030770..f5b1d98eea3213 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2341ffe8acbda0ad14e43fca01d72733855b5bde3b29601f9bbeaa4d6ff41207 -size 58357 +oid sha256:0df4e94924f81aab66086702d85a461f463078f0d06f67b1fe5d46ad8480aa91 +size 58652 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index 561c47897650fb..f676b44edd1d9a 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce0e6f8e05d4a4e52b304aa95e729a9fac0def06f80f61feacf0405f95dbb31f -size 509296 +oid sha256:b5a857cd060d740290ccc65aec47252aad9f41c665dc2808195c3185248977e8 +size 509376 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index e8c8278cf4c90d..67719cdcbd66b0 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47fc91d79709effd086bc71f8586af4bc47ce40a460e1d886a10fb3abf0ce2d8 -size 56091 +oid sha256:ddc40900fddf1a115903c4e200899306060114348bf2ca82fbb4d7d92a885b09 +size 53897 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index cf32ec81286190..af63ef41697b47 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afd0387776f050660082e8adde8af8958eaf586f20e55da75b392f53362487ca -size 459024 +oid sha256:d1276209027e5aac72e4bb6f39f4494d2a807ee4bd85054a1285b0832e4515b9 +size 460797 diff --git a/docs/notebooks/distil-whisper-asr-with-output.rst b/docs/notebooks/distil-whisper-asr-with-output.rst index fdd81327b5675a..53950226a210c7 100644 --- a/docs/notebooks/distil-whisper-asr-with-output.rst +++ b/docs/notebooks/distil-whisper-asr-with-output.rst @@ -85,9 +85,9 @@ Prerequisites .. code:: ipython3 - %pip install -q "transformers>=4.35" "torch>=2.1,<2.4.0" "torchvision<0.19.0" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.35" "torch>=2.4.1" "onnx!=1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - %pip install -q "openvino>=2023.2.0" datasets "gradio>=4.0" "librosa" "soundfile" + %pip install -q "openvino>=2023.2.0" datasets "gradio>=4.19" "librosa" "soundfile" %pip install -q "nncf>=2.6.0" "jiwer" import requests @@ -133,7 +133,8 @@ using tokenizer. "distil-whisper/distil-small.en", ], "Whisper": [ - "openai/whisper-large-v3-turbo" "openai/whisper-large-v3", + "openai/whisper-large-v3-turbo", + "openai/whisper-large-v3", "openai/whisper-large-v2", "openai/whisper-large", "openai/whisper-medium", diff --git a/docs/notebooks/distilbert-sequence-classification-with-output.rst b/docs/notebooks/distilbert-sequence-classification-with-output.rst index 463a8051cf4d8b..862079f68aeeb7 100644 --- a/docs/notebooks/distilbert-sequence-classification-with-output.rst +++ b/docs/notebooks/distilbert-sequence-classification-with-output.rst @@ -47,31 +47,31 @@ Imports .. parsed-literal:: Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.45.2) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) - Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.25.1) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) - Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) - Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.0) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.46.1) + Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.6) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.26.2) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) + Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) + Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.9.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) Note: you may need to restart the kernel to use updated packages. @@ -110,6 +110,15 @@ model from Hugging Face. checkpoint = "distilbert-base-uncased-finetuned-sst-2-english" model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=checkpoint) + +.. parsed-literal:: + + 2024-11-04 23:18:47.102633: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:18:47.135966: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-04 23:18:47.793551: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + Initializing the Tokenizer -------------------------- @@ -166,10 +175,15 @@ optimal execution on end-point target devices. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mask, torch.tensor(torch.finfo(scores.dtype).min) + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. OpenVINO™ Runtime uses the `Infer @@ -274,7 +288,7 @@ For a single input sentence .. parsed-literal:: Label: POSITIVE - Total Time: 0.02 seconds + Total Time: 0.03 seconds Read from a text file diff --git a/docs/notebooks/dolly-2-instruction-following-with-output.rst b/docs/notebooks/dolly-2-instruction-following-with-output.rst index 01d4b8fed8bb57..3b276a1725f0aa 100644 --- a/docs/notebooks/dolly-2-instruction-following-with-output.rst +++ b/docs/notebooks/dolly-2-instruction-following-with-output.rst @@ -136,6 +136,8 @@ documentation `__. .. code:: ipython3 import os + from pathlib import Path + import requests os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" @@ -144,12 +146,17 @@ documentation `__. %pip install -q "diffusers>=0.16.1" "transformers>=4.33.0" "torch>=2.1" "nncf>=2.10.0" "onnx<1.16.2" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - import requests - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) Convert model using Optimum-CLI tool ------------------------------------ @@ -214,10 +221,10 @@ you can add ``--sym``. For INT4 quantization you can also specify the following arguments : - The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. + quantization, -1 it will results in per-column quantization. - The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. @@ -227,7 +234,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 - from IPython.display import Markdown, display + from IPython.display import display import ipywidgets as widgets prepare_int4_model = widgets.Checkbox( @@ -272,6 +279,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 from pathlib import Path + from cmd_helper import optimum_cli model_id = "databricks/dolly-v2-3b" model_path = Path("dolly-v2-3b") @@ -284,36 +292,19 @@ sacrifice of the model size and inference latency. def convert_to_fp16(): if (fp16_model_dir / "openvino_model.xml").exists(): return - fp16_model_dir.mkdir(parents=True, exist_ok=True) - export_command_base = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format fp16".format(model_id) - export_command = export_command_base + " " + str(fp16_model_dir) - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - ! $export_command + optimum_cli(model_id, fp16_model_dir, additional_args={"weight-format": "fp16"}) def convert_to_int8(): if (int8_model_dir / "openvino_model.xml").exists(): return - int8_model_dir.mkdir(parents=True, exist_ok=True) - export_command_base = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int8".format(model_id) - export_command = export_command_base + " " + str(int8_model_dir) - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - ! $export_command + optimum_cli(model_id, int8_model_dir, additional_args={"weight-format": "int8"}) def convert_to_int4(): if (int4_model_dir / "openvino_model.xml").exists(): return - int4_model_dir.mkdir(parents=True, exist_ok=True) - export_command_base = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4 --ratio 1.0 --group-size 128".format( - model_id - ) - export_command = export_command_base + " " + str(int4_model_dir) - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - ! $export_command + optimum_cli(model_id, int4_model_dir, additional_args={"weight-format": "int4"}) if prepare_fp16_model.value: diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 194282459f56cb..992c346194e31c 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -189,10 +189,10 @@ Prerequisites remote: Counting objects: 100% (153/153), done. remote: Compressing objects: 100% (99/99), done. remote: Total 335 (delta 97), reused 54 (delta 54), pack-reused 182 (from 1) - Receiving objects: 100% (335/335), 72.41 MiB | 19.06 MiB/s, done. + Receiving objects: 100% (335/335), 72.41 MiB | 20.85 MiB/s, done. Resolving deltas: 100% (123/123), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images @@ -282,7 +282,7 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1204: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:834: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( @@ -296,6 +296,20 @@ We will use model for 256x256 resolution as example. Also, models for .. parsed-literal:: AE working on z of shape (1, 4, 32, 32) = 4096 dimensions. + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + 2024-11-04 23:23:38.980054: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-04 23:23:39.013901: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-04 23:23:39.616188: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. parsed-literal:: + >>> model checkpoint loaded. @@ -374,6 +388,17 @@ Convert CLIP text encoder del cond_stage_model gc.collect(); + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + Convert CLIP image encoder ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -399,43 +424,43 @@ resolutions. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input.numel() == 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size == input_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! antialias = antialias and (max(factors) > 1) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if antialias: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_shape_to_check[i] != dim: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(mean.shape) == 0 or mean.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if len(std.shape) == 0 or std.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape and mean.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape and std.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. std = torch.as_tensor(std, device=data.device, dtype=data.dtype) @@ -464,7 +489,7 @@ Convert AE encoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! w_ = w_ * (int(c)**(-0.5)) @@ -508,15 +533,15 @@ Convert Diffusion U-Net model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if l_context == 77 + t*16: ## !!! HARD CODE here - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.use_temporal_conv and batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[1] == self.channels @@ -903,14 +928,14 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_79693/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_511478/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-08 00:11:25 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 194.23 seconds + start: man fishing in a boat at sunset 2024-11-04 23:26:56 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 206.55 seconds .. code:: ipython3 @@ -1160,14 +1185,6 @@ quantization time. INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -.. parsed-literal:: - - 2024-10-08 00:40:44.424263: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 00:40:44.462873: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 00:40:45.077046: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: @@ -1347,8 +1364,8 @@ Let’s run the optimized pipeline .. parsed-literal:: - start: man fishing in a boat at sunset 2024-10-08 01:40:46 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.42 seconds + start: man fishing in a boat at sunset 2024-11-05 00:58:08 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 97.78 seconds .. code:: ipython3 @@ -1453,9 +1470,9 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 193.245 - INT8 latency: 97.168 - Performance speed up: 1.989 + FP32 latency: 201.526 + INT8 latency: 96.036 + Performance speed up: 2.098 Interactive inference diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index e9c5d2f07afec7..b50b82341f4af8 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -82,20 +82,12 @@ Prerequisites .. code:: ipython3 - import platform - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" "matplotlib>=3.4" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -116,9 +108,9 @@ Prerequisites remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 22.33 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 23.37 MiB/s, done. Resolving deltas: 100% (246/246), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM .. code:: ipython3 @@ -385,23 +377,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -648,10 +640,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-10-08 01:57:55.723142: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 01:57:55.754489: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:15:40.935673: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:15:40.968460: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 01:57:56.401127: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:15:41.606156: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -818,7 +810,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.82 ms + [ INFO ] Read model took 30.24 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -838,7 +830,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1394.30 ms + [ INFO ] Compile model took 1388.43 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -879,17 +871,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 815.67 ms + [ INFO ] First inference took 798.46 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 54 iterations - [ INFO ] Duration: 16885.27 ms + [ INFO ] Count: 49 iterations + [ INFO ] Duration: 16827.30 ms [ INFO ] Latency: - [ INFO ] Median: 1856.65 ms - [ INFO ] Average: 1850.85 ms - [ INFO ] Min: 1459.90 ms - [ INFO ] Max: 2009.04 ms - [ INFO ] Throughput: 3.20 FPS + [ INFO ] Median: 2025.54 ms + [ INFO ] Average: 1991.09 ms + [ INFO ] Min: 816.09 ms + [ INFO ] Max: 2176.67 ms + [ INFO ] Throughput: 2.91 FPS .. code:: ipython3 @@ -915,7 +907,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 44.15 ms + [ INFO ] Read model took 43.95 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -935,7 +927,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1618.00 ms + [ INFO ] Compile model took 1607.96 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -976,17 +968,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 587.18 ms + [ INFO ] First inference took 596.94 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 58 iterations - [ INFO ] Duration: 16436.53 ms + [ INFO ] Count: 55 iterations + [ INFO ] Duration: 15959.69 ms [ INFO ] Latency: - [ INFO ] Median: 1670.45 ms - [ INFO ] Average: 1680.37 ms - [ INFO ] Min: 1321.28 ms - [ INFO ] Max: 2532.97 ms - [ INFO ] Throughput: 3.53 FPS + [ INFO ] Median: 1701.74 ms + [ INFO ] Average: 1692.86 ms + [ INFO ] Min: 653.76 ms + [ INFO ] Max: 1817.85 ms + [ INFO ] Throughput: 3.45 FPS Interactive segmentation demo @@ -1316,7 +1308,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index 8854bf68943a42..9f65fa9db4554a 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0ed908091274e314601740b2be6f08a06b74532f09d98e99703a91f1155ccd4 -size 1260810 +oid sha256:9368b1fbd458d1e022a768f24e689af0fd6e5dacc98a920f45d3fc0f63062567 +size 1259373 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index 45da467e43595b..7c0716600906a1 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:444eaadeac9bde960bc08fc6eed2ca8c5d5de854782d9ea322535ec1e35a38b0 -size 1261402 +oid sha256:22f0e5bfd74e7426218d2bd007f9219433556530ddb10f33b9706398eb7cd370 +size 1263404 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index 91cbceb5a9bc44..0a717e2c9aa38d 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90b937884a9df8f888beb020e5a56dba5ee941f780ac61dd0fda8502909038d2 -size 1261809 +oid sha256:d1863ccc9483f6cbd60768b311d104ee68692c3a7181e06da4bc751b52cf0ca1 +size 1262535 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 5036c6f32a2259..7f0e153ffa4a55 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -142,7 +142,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -302,7 +302,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -402,13 +402,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -428,11 +428,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/explainable-ai-1-basic-with-output.rst b/docs/notebooks/explainable-ai-1-basic-with-output.rst index d04827023dc979..1df31312fd752f 100644 --- a/docs/notebooks/explainable-ai-1-basic-with-output.rst +++ b/docs/notebooks/explainable-ai-1-basic-with-output.rst @@ -72,7 +72,9 @@ Guide =2024.2.0" opencv-python tqdm # Install openvino xai package - %pip install -q --no-deps "openvino-xai>=1.0.0" + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -128,12 +130,6 @@ Download the Model and data samples else: print(f"{model_name} already downloaded to {base_artifacts_dir}") - -.. parsed-literal:: - - v3-small_224_1.0_float already downloaded to artifacts - - Select inference device ----------------------- @@ -146,15 +142,6 @@ select device from dropdown list for running inference using OpenVINO device = device_widget() device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Load the Model -------------- @@ -187,7 +174,7 @@ Load an Image # Reshape to model input shape. input_image = np.expand_dims(input_image, 0) - plt.imshow(image); + plt.imshow(image) .. parsed-literal:: @@ -196,7 +183,15 @@ Load an Image -.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png + +.. parsed-literal:: + + + + + + +.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png Do Inference @@ -218,12 +213,6 @@ Do Inference imagenet_classes = imagenet_filename.read_text().splitlines() - -.. parsed-literal:: - - 'data/imagenet_2012.txt' already exists. - - .. code:: ipython3 # The model description states that for this model, class 0 is a background. @@ -276,23 +265,22 @@ saliency_map}). For classification, targets are indices of the classes. explanation = explainer( data=input_image, targets=result_index, # can be a single target or a container of targets - overlay=True, # saliency map overlay over the input image, defaults to False + label_names=imagenet_classes, # optional, list of label names + overlay=True, # saliency map overlays over the input image, defaults to False ) - plt.imshow(explanation.saliency_map[result_index]) - plt.title(f"Saliency map of the {result_index} class.") - - - - -.. parsed-literal:: - - Text(0.5, 1.0, 'Saliency map of the 206 class.') + explanation.plot() +.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png -.. image:: explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png +Note: by default, overlay is applied over the image in the ``data`` +argument. In this case, ``data`` was preprocessed (e.g. resized to +224x224), but still recognizable by human. In order for the overlay to +applied over the original image, provide original image with +``original_image`` argument (please refer to `OpenVINO™ Explainable AI +Toolkit (2/3): Deep Dive `__). Above saliency map can help to answer the question: “Which part of the image mostly contributes to the model predicted class: (206, ‘n02099267 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png deleted file mode 100644 index 156c14c9b4af72..00000000000000 --- a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:800d793a4cc16c26b283c899a8eab37260ca4711929b45c8206fc124aa75ab99 -size 387941 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png new file mode 100644 index 00000000000000..a8fc791b3e4c52 --- /dev/null +++ b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_11_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b1a955ec7a4a7394f905837b1a1686d3bb5130565eb9d4901eade821e6757c +size 387941 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png new file mode 100644 index 00000000000000..c6484cb68eeb33 --- /dev/null +++ b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d8ae99fc5c2d7573e5243b1711dab83c7f4658aa423d067ebd17fd87d336c5 +size 351476 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png b/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png deleted file mode 100644 index 00a5b1e39fb4d5..00000000000000 --- a/docs/notebooks/explainable-ai-1-basic-with-output_files/explainable-ai-1-basic-with-output_19_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a6207cc5c8c5fbfe4af1d74ef84cbdc03b86e79ed396b6a28bbb1fe2d9a176f7 -size 242785 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst index f96778061cc390..4e2ad0970661d2 100644 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst @@ -57,31 +57,31 @@ predicting a particular label. - `Preprocess image for MobileNet <#preprocess-image-for-mobilenet>`__ -- `Basic usage: Auto mode - explainer <#basic-usage-auto-mode-explainer>`__ +- `Basic usage: Explainer in AUTO + mode <#basic-usage-explainer-in-auto-mode>`__ - - `Create Explainer <#create-explainer>`__ - - `Do explanation <#do-explanation>`__ + - `Create Explainer object <#create-explainer-object>`__ + - `Generate explanation <#generate-explanation>`__ - `Visualize saliency maps <#visualize-saliency-maps>`__ - `Save saliency maps <#save-saliency-maps>`__ - - `Return saliency maps for all - classes <#return-saliency-maps-for-all-classes>`__ + - `Generate saliency maps for all + classes <#generate-saliency-maps-for-all-classes>`__ - `Pre- and post-process functions <#pre--and-post-process-functions>`__ - `Visualization Parameters <#visualization-parameters>`__ -- `White-box explainer <#white-box-explainer>`__ +- `Explainer in WHITEBOX mode <#explainer-in-whitebox-mode>`__ - - `ReciproCAM explain method <#reciprocam-explain-method>`__ + - `ReciproCAM XAI method <#reciprocam-xai-method>`__ - `Insert XAI branch <#insert-xai-branch>`__ - `Insertion-related parameters <#insertion-related-parameters>`__ -- `Black-box explainer <#black-box-explainer>`__ +- `Explainer in BLACKBOX mode <#explainer-in-blackbox-mode>`__ - `Advanced <#advanced>`__ - `Import ImageNet label names and add them to saliency maps <#import-imagenet-label-names-and-add-them-to-saliency-maps>`__ - - `Activation map explain method <#activation-map-explain-method>`__ + - `Activation map XAI method <#activation-map-xai-method>`__ Installation Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -110,9 +110,11 @@ Install requirements import platform # Install openvino package - %pip install -q "openvino>=2024.2.0" opencv-python tqdm + %pip install -q "openvino>=2024.2.0" opencv-python tqdm scipy - %pip install -q --no-deps "openvino-xai>=1.0.0" + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -175,11 +177,10 @@ converted to IR model from OpenVINO storage. else: print(f"{model_name} already downloaded to {base_artifacts_dir}") +.. code:: ipython3 -.. parsed-literal:: - - v3-small_224_1.0_float already downloaded to artifacts - + # Create ov.Model + model = ov.Core().read_model(model_xml_path) Load the Image ~~~~~~~~~~~~~~ @@ -196,7 +197,7 @@ Load the Image # The MobileNet model expects images in RGB format. image = cv2.cvtColor(cv2.imread(filename=str(image_filename)), code=cv2.COLOR_BGR2RGB) - plt.imshow(image); + plt.imshow(image) .. parsed-literal:: @@ -205,7 +206,15 @@ Load the Image -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png + +.. parsed-literal:: + + + + + + +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png Preprocess image for MobileNet @@ -220,41 +229,41 @@ Preprocess image for MobileNet # Add batch dimension preprocessed_image = np.expand_dims(preprocessed_image, 0) -Basic usage: Auto mode explainer --------------------------------- +Basic usage: ``Explainer`` in ``AUTO`` mode +------------------------------------------- -The easiest way to run the explainer is to do it in Auto mode. Under the -hood of Auto mode, it will first try to run the ``White-Box`` mode. If -this fails, it will then run the ``Black-Box`` mode. See more details -about `White-Box <#white-box-explainer>`__ and -`Black-Box <#black-box-explainer>`__ modes below. +The easiest way to generate saliency maps is to use ``Explainer`` in +``ExplainMode.AUTO`` mode (``AUTO`` mode is used by default). -| Generating saliency maps involves model inference. The explainer will - perform model inference, but to do so, it requires ``preprocess_fn`` - and ``postprocess_fn``. -| At this stage, we can avoid passing ``preprocess_fn`` by preprocessing - the data beforehand (e.g., resizing and adding a batch dimension as - shown above). We also don’t pass ``postprocess_fn`` here for - simplicity, since the White-Box mode doesn’t fail on the example - model. +Under the hood of ``AUTO`` mode, ``Explainer`` will first try to run the +``WHITEBOX`` mode. If ``WHITEBOX`` fails, it will then run the +``BLACKBOX`` mode as a fallback option. See more details about +`WHITEBOX <#explainer-in-whitebox-mode>`__ and +`BLACKBOX <#explainer-in-blackbox-mode>`__ modes below. -To learn more about pre- and post-process functions, refer to the `Pre- +Generating saliency maps involves model inference. The explainer will +perform model inference, but to do so, it requires ``preprocess_fn`` and +``postprocess_fn``. We can avoid passing ``preprocess_fn`` by +preprocessing (e.g., resizing and adding a batch dimension as shown +above) the input data beforehand - by default, ``preprocess_fn`` is the +identity function. We expect that current example will successfully use +``WHITEBOX`` mode under the hood, therefore we don’t pass +``postprocess_fn`` (``postprocess_fn`` is not required for ``WHITEBOX`` +mode, only for ``BLACKBOX``). + +To learn more about pre- and post-process functions, refer to the `pre- and post-process functions <#pre--and-post-process-functions>`__ section. -Create Explainer -~~~~~~~~~~~~~~~~ +Create ``Explainer`` object +~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - # Create ov.Model - model = ov.Core().read_model(model_xml_path) - - # Create explainer object explainer = xai.Explainer( model=model, task=xai.Task.CLASSIFICATION, @@ -269,27 +278,27 @@ Create Explainer INFO:openvino_xai:Explaining the model in white-box mode. -Do explanation -~~~~~~~~~~~~~~ +Generate ``explanation`` +~~~~~~~~~~~~~~~~~~~~~~~~ -The predicted label for this image is ``flat-coated_retriever`` with -label index ``206``. So here and further we will check saliency maps for -this index. +The predicted class for this model-image pair is +``flat-coated_retriever`` with class index ``206``. So here and further +we will check saliency maps for this index. .. code:: ipython3 - # You can choose classes to generate saliency maps for. - # In this notebook we will check maps for predicted class 206 - flat-coated retriever + # You can choose class(es) to generate saliency maps for. + # In this notebook we will check maps for predicted class with index 206 - "flat-coated retriever" retriever_class_index = 206 .. code:: ipython3 explanation = explainer( preprocessed_image, - targets=retriever_class_index, - overlay=True, # False by default + targets=retriever_class_index, # can be a single target or a container of targets + overlay=True, # saliency map overlay over the original image, False by default, set to True for better visual inspection ) Visualize saliency maps @@ -300,16 +309,14 @@ Visualize saliency maps .. code:: ipython3 explanation: Explanation - # Dict[int: np.ndarray] where key - class id, value - processed saliency map e.g. 354x500x3 - explanation.saliency_map + # explanation.saliency_map: Dict[int: np.ndarray] # where key - class id, value - processed saliency map (e.g. 354 x 500 x 3 shape) # Check saved saliency maps print(f"Saliency maps were generated for the following classes: {explanation.targets}") print(f"Saliency map size: {explanation.shape}") - # Show saliency maps for retriever class - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map); + # Visualize generated saliency maps for each target class (.plot() supports plotting multiple saliency maps) + explanation.plot() .. parsed-literal:: @@ -319,7 +326,7 @@ Visualize saliency maps -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png Save saliency maps @@ -330,38 +337,48 @@ Save saliency maps .. code:: ipython3 # Save saliency map - output = base_artifacts_dir / "explain_auto" - explanation.save(output) + explanation.save(base_artifacts_dir, "explain_auto_") .. code:: ipython3 - # See saved saliency maps - image_sal_map = cv2.imread(f"{output}/target_{retriever_class_index}.jpg") + # Plot saved saliency map + image_sal_map = cv2.imread(f"{base_artifacts_dir}/explain_auto_{retriever_class_index}.jpg") image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) - plt.imshow(image_sal_map); + plt.imshow(image_sal_map) + + + + +.. parsed-literal:: + -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png -Return saliency maps for all classes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png +Generate saliency maps for all classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +To obtain saliency maps for all classes, set ``targets`` to ``None`` or +``-1``. .. code:: ipython3 explanation = explainer(preprocessed_image, targets=-1) # Check saved saliency maps - print(f"Saliency maps were generated for the following classes: {explanation.targets}") + print(f"Saliency maps were generated for the following classes: {explanation.targets[:5]} ... {explanation.targets[-5:]}") print(f"Saliency map size: {explanation.shape}") .. parsed-literal:: - Saliency maps were generated for the following classes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000] + Saliency maps were generated for the following classes: [0, 1, 2, 3, 4] ... [996, 997, 998, 999, 1000] Saliency map size: (224, 224, 3) @@ -379,19 +396,19 @@ constructor. By default, ``preprocess_fn`` is an identity function that passes the input without any changes, assuming it is preprocessed beforehand. -In Auto mode, the explainer tries to run the White-Box mode first. If it -fails, the corresponding exception will be raised, and the Black-Box -mode will be enabled as a fallback. +In ``AUTO`` mode, the explainer tries to run the ``WHITEBOX`` mode +first. If it fails, the corresponding exception will be raised, and the +``BLACKBOX`` mode will be enabled as a fallback. -The Black-Box mode requires access to the output ``logits`` (activated -or not). Therefore, in such cases, ``postprocess_fn`` is required, which -accepts the raw IR model output and returns logits (see below for a -reference). +The ``BLACKBOX`` mode requires access to the output ``logits`` +(activated or not). Therefore, in such cases, ``postprocess_fn`` is +required, which accepts the raw IR model output and returns ``logits`` +(see below for a reference). .. code:: ipython3 def preprocess_fn(x: np.ndarray) -> np.ndarray: - # Implementing own pre-process function based on model's implementation + # Implementing pre-processing based on model's pipeline x = cv2.resize(src=x, dsize=(224, 224)) # Add batch dimension @@ -400,7 +417,7 @@ reference). def postprocess_fn(x: OVDict): - # Implementing own post-process function based on model's implementation + # Implementing post-processing function based on model's pipeline # Return "logits" model output return x[0] @@ -447,7 +464,7 @@ Visualization Parameters # Create explainer object explainer = xai.Explainer(model=model, task=xai.Task.CLASSIFICATION) - # Return overlayed image + # Generate overlayed saliency_map explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names @@ -455,12 +472,10 @@ Visualization Parameters original_input_image=image, # to apply overlay on the original image instead of preprocessed one that was used for the explainer ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map) + explanation.plot() # Save saliency map - output = base_artifacts_dir / "overlay" - explanation.save(output) + explanation.save(base_artifacts_dir, "overlay_") .. parsed-literal:: @@ -472,33 +487,31 @@ Visualization Parameters -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png .. code:: ipython3 - # Return low-resolution saliency map + # Generate saliency map without overlay over original image explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names overlay=False, # False by default ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map) + explanation.plot() # Save saliency map - output = base_artifacts_dir / "colormap" - explanation.save(output) + explanation.save(base_artifacts_dir, "colormap_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png .. code:: ipython3 - # Return low-resolution gray-scale saliency map + # Return low-resolution (raw) gray-scale saliency map explanation = explainer( preprocessed_image, targets=[retriever_class_index], # target can be a single label index, label name or a list of indices/names @@ -506,37 +519,37 @@ Visualization Parameters colormap=False, # True by default ) - retriever_sal_map = explanation.saliency_map[retriever_class_index] - plt.imshow(retriever_sal_map, cmap="gray") + explanation.plot() # Save saliency map - output = base_artifacts_dir / "grayscale" - explanation.save(output) + explanation.save(base_artifacts_dir, "grayscale_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png -White-Box explainer -------------------- +``Explainer`` in ``WHITEBOX`` mode +---------------------------------- -ReciproCAM explain method +``ReciproCAM`` XAI method ~~~~~~~~~~~~~~~~~~~~~~~~~ -The White-Box explainer treats the model as a white box and needs to -make inner modifications. It adds extra XAI nodes after the backbone to -estimate which activations are important for model prediction. +``Explainer`` in ``WHITEBOX`` mode treats the model as a white box and +performs its inner modifications. ``Explainer`` inserts extra XAI nodes +after the backbone to estimate which activations are important for model +prediction. If a method is not specified, the XAI branch will be generated using the `ReciproCAM `__ method. By default, the insertion of the XAI branch will be done automatically -by searching for the correct node. +by searching for the correct node - ``target_layer`` (``target_layer`` +can be specified manually). It works quickly and precisely, requiring only one model inference. @@ -547,10 +560,8 @@ It works quickly and precisely, requiring only one model inference. model=model, task=xai.Task.CLASSIFICATION, preprocess_fn=preprocess_fn, - # defaults to ExplainMode.AUTO - explain_mode=ExplainMode.WHITEBOX, - # ReciproCAM is the default XAI method for CNNs - explain_method=xai.Method.RECIPROCAM, + explain_mode=ExplainMode.WHITEBOX, # defaults to ExplainMode.AUTO + explain_method=xai.Method.RECIPROCAM, # ReciproCAM is the default white-box method for CNNs ) @@ -586,7 +597,7 @@ environment. model, task=xai.Task.CLASSIFICATION, explain_method=xai.Method.RECIPROCAM, - target_layer="MobilenetV3/Conv_1/Conv2D", # MobileNet V3 + target_layer="MobilenetV3/Conv_1/Conv2D", # optional, by default insert_xai will try to find target_layer automatically embed_scaling=True, ) @@ -598,23 +609,28 @@ environment. INFO:openvino_xai:Insertion of the XAI branch into the model was successful. +**Note**: ``insert_xai`` supports both OpenVINO IR and PyTorch models. +See documentation for more details. + Insertion-related parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If automatic search for correct node fails, you can set up a correct -node manually with ``target_layer`` argument. For classification it’s +node manually with ``target_layer`` argument. For classification, it’s the last backbone node with shape [1, num_channels, feature_map_height, -feature_map_width]. For example, for MobileNetV3 it will be +feature_map_width]. For example, for the used MobileNetV3 it will be ``MobilenetV3/Conv_1/Conv2D`` layer with [1, 576, 7, 7] output shape. To find the right ``target_layer`` for your model, check the name of the -last convolutional layer in the backbone using ``.XML`` model. +last convolutional node in the backbone using ``.XML`` file (optionally, +use some graph visualization tool, such as Netron). ``embed_scaling`` **default True** (for speed purposes), this parameter -adds normalization to the XAI branch, which results in being able to -visualize saliency maps right away without further postprocessing. +ensures that saliency map scaling is embedded into the graph, which +results in being able to visualize saliency maps right away without +further postprocessing. .. code:: ipython3 @@ -638,25 +654,32 @@ visualize saliency maps right away without further postprocessing. INFO:openvino_xai:Explaining the model in white-box mode. -Black-Box explainer -------------------- - +``Explainer`` in ``BLACKBOX`` mode +---------------------------------- -The Black-Box method treats the model as a black box without altering -its structure. Therefore, this method will work on any model that can be -inferred and return class probabilities as output. -The `RISE `__ algorithm used in -Black-Box mode applies random masks to hide parts of the image, -retrieves the resulting class probabilities, and uses this information -to calculate the “importance” of each part of the image for the final -results. After performing thousands of inferences, a summarized saliency -map is generated. +``Explainer`` in ``BLACKBOX`` mode treats the model as a black box +without altering its internal structure. Therefore, this method will +work on any model that can be inferred and return class scores as +output. While it is convenient to treat every model as a black box for -explanation purposes, this algorithm may require a large number of -inferences (defaulting to 5000) to generate a high-quality saliency map. +explanation purposes, black-box method may require a significant number +of inferences (AISE requires 120-500 model inferences). + +Given that the quality of the saliency maps usually correlates with the +number of available inferences, we propose the following presets for the +black-box methods: ``Preset.SPEED``, ``Preset.BALANCE``, +``Preset.QUALITY`` (``Preset.BALANCE`` is used by default). + +AISE (Adaptive Input Sampling for Explanation of Black-box Models) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +AISE is used as a default black-box method. AISE formulates saliency map +generation as a kernel density estimation (KDE) problem, and adaptively +sample input masks using a derivative-free optimizer to maximize mask +saliency score. .. code:: ipython3 @@ -673,45 +696,69 @@ inferences (defaulting to 5000) to generate a high-quality saliency map. explanation = explainer( image, targets=retriever_class_index, - # targets=-1, # Explain all classes overlay=True, - num_masks=1000, # kwargs of the RISE algo ) .. parsed-literal:: INFO:openvino_xai:Explaining the model in black-box mode. - Explaining in synchronous mode: 100%|██████████| 1000/1000 [00:03<00:00, 259.73it/s] .. code:: ipython3 + # Plot saliency map + explanation.plot() + # Save saliency map - output = base_artifacts_dir / "blackbox_explain" - explanation.save(output) + explanation.save(base_artifacts_dir, "blackbox_aise_") + + + +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png + + +RISE (Randomized Input Sampling for Explanation of Black-box Models) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`RISE `__ probes a model by +sub-sampling the input image via random masks and records its response +to each of them. RISE creates random masks from down-scaled space +(e.g. 7×7 grid) and adds random translation shifts for the pixel-level +explanation with further up-sampling. Weighted sum of all sampled masks +used to generate the fine-grained saliency map. + +.. code:: ipython3 + + # Create explainer object + explainer = xai.Explainer( + model=model, + task=xai.Task.CLASSIFICATION, + preprocess_fn=preprocess_fn, + postprocess_fn=postprocess_fn, + explain_mode=ExplainMode.BLACKBOX, # defaults to AUTO + explain_method=xai.Method.RISE, # xai.Method.AISE is used by default + ) - # See saved saliency maps - image_sal_map = cv2.imread(f"{output}/target_{retriever_class_index}.jpg") - image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) - plt.imshow(image_sal_map); + # Generate explanation + explanation = explainer( + image, + targets=retriever_class_index, + overlay=True, + ) +.. code:: ipython3 + # Plot saliency map + explanation.plot() + + # Save saliency map + explanation.save(base_artifacts_dir, "blackbox_rise_") -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png -For the ``Black-Box explainer``, the number of masks and cells is -crucial for achieving good results. In the example above, it’s evident -that the number of masks was insufficient to create a high-quality map. +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png -Varying the ``num_cells`` and ``num_masks`` parameters can achieve -different goals: - To speed up the explanation, you can reduce the -number of ``num_masks``. However, this will decrease the quality of the -resulting saliency maps, making it suitable for large and focused -objects. - Increasing ``num_cells`` provides a more fine-grained result, -but it requires a larger ``num_masks`` to converge. This approach is -more effective for classes with complex shapes. Advanced -------- @@ -724,9 +771,8 @@ Import ImageNet label names and add them to saliency maps If ``label_names`` are not provided to the explainer call, the saved -saliency map will have the predicted class index, not the name. For -example, ``image_name_target_206.jpg`` instead of -``image_name_target_retriever.jpg``. +saliency map will have the predicted class index, not the label name. +For example, ``206.jpg`` instead of ``retriever.jpg``. To conveniently view label names in saliency maps, we provide ImageNet label names information to the explanation call. @@ -781,8 +827,8 @@ label names information to the explanation call. # Adding ImageNet label names. explanation = explainer( image, - # Return saliency maps for 2 named labels - targets=["flat-coated_retriever", "microwave"], # Also label indices [206, 652] are possible as target + # Return saliency maps for 2 named labels, possible if label_names is provided + targets=["flat-coated_retriever", "microwave"], # slso label indices [206, 652] are possible as target label_names=imagenet_labels, ) @@ -797,8 +843,7 @@ label names information to the explanation call. .. code:: ipython3 # Save saliency map - output = base_artifacts_dir / "label_names" - explanation.save(output) + explanation.save(base_artifacts_dir, "label_names_") Below in ``base_artifacts_dir / "label_names"`` you can see saved saliency maps with label name on it: @@ -806,18 +851,18 @@ saliency maps with label name on it: .. code:: ipython3 # See saliency mas saved in `output` with predicted label in image name - for file_name in output.glob("*"): + for file_name in base_artifacts_dir.glob("label_names_*"): print(file_name) .. parsed-literal:: - artifacts/label_names/target_microwave.jpg - artifacts/label_names/target_flat-coated_retriever.jpg + artifacts/label_names_microwave.jpg + artifacts/label_names_flat-coated_retriever.jpg -Activation map explain method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Activation map XAI method +~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -840,11 +885,8 @@ saliency maps for each class, the activation map is returned as explain_method=xai.Method.ACTIVATIONMAP, ) - explanation = explainer(image, targets=-1, overlay=True) - activation_map = explanation.saliency_map["per_image_map"] - - plt.imshow(activation_map) - plt.show() + explanation = explainer(image, overlay=True) + explanation.plot() .. parsed-literal:: @@ -855,5 +897,5 @@ saliency maps for each class, the activation map is returned as -.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png +.. image:: explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png deleted file mode 100644 index e5049a64f6dbef..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_10_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e52cd26d8672300419d11ceda43b756f44961da4d0ed4ba1b907eb5223d4c546 -size 387941 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png new file mode 100644 index 00000000000000..a8fc791b3e4c52 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_11_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b1a955ec7a4a7394f905837b1a1686d3bb5130565eb9d4901eade821e6757c +size 387941 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png deleted file mode 100644 index b1b2f91a1c2a7d..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_21_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca8b8d022b4ebab307caa62d5de9be5553ee84492bcc1709a8267a3aba8f2374 -size 237061 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png new file mode 100644 index 00000000000000..8822fe615f6f19 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_22_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b40d8eb0d4a89c7c24a9cc676e3b4f298e5eabdb7a9d5a5604d4c8533dca7f +size 342254 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png deleted file mode 100644 index 4e780c1f4cba88..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_24_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26893793cf6015fdf551d8554731e41aad9a436bfe2df0fcdbf469b20d25eb22 -size 233040 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png new file mode 100644 index 00000000000000..125556adbb530c --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_25_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354fa33f0a0e6becba16a5c65dde256282215e75a1e379500fd9e9d5fed7845e +size 235673 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png deleted file mode 100644 index 0f9120d6ab8b9d..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_32_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fd585e98c3b12072d9d2fd2b24f3e46e946f4a3771b10a7b987e3e126b218fb -size 336183 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png deleted file mode 100644 index 541cb8c169552e..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_33_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ae15d3b00ae8e36c86ffce352d92f24c370ca2948b53c06bc2bb9a9d3e73356 -size 51371 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png deleted file mode 100644 index f42dca24596405..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90c7bf014e9c04a3ed78cf6965ed82ae371fc7c4a18fd717190e1e5561f0dc0a -size 6162 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png new file mode 100644 index 00000000000000..104fcabc090172 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_34_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14efddd662af5ecae15f38d9fa20e001d7c1f1f26418d3a89ea0f489a5aee993 +size 312661 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png new file mode 100644 index 00000000000000..60fdb91b059005 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_35_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cae625a36aeb18c1fd5f5b7e673b7e58836f8bf914b43906cb4e5c81cb33885f +size 62966 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png new file mode 100644 index 00000000000000..1c0f2dbbeb4f3a --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_36_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2596ddca7816d246bea422e0c9b809a41feaf65163d6c27c0b422ba6f16a440 +size 4947 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png deleted file mode 100644 index 6ed4fcc4d48282..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_45_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fd21924f3fd3631e547d7b0254a148df812f50da4c784bdc4357ad8635e4cd7 -size 354123 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png new file mode 100644 index 00000000000000..2c5b7a96ca9399 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_49_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d33ea52b17c068fb6e2bff4ad0d4f0993e38a6a074a3ac1af3ccaefec2199d +size 326076 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png new file mode 100644 index 00000000000000..6fa958fe614823 --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_52_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41ea03631f0daa7400793e57138b4af52e13dc5294a3440688dd27a5034215e +size 324115 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png deleted file mode 100644 index 3f0c1df4b7e57c..00000000000000 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_57_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4b654a427e48742a5d20a15cf89fb3c7667123ce981c6e719299b42931e696e0 -size 336612 diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png new file mode 100644 index 00000000000000..98255daa2893ee --- /dev/null +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output_files/explainable-ai-2-deep-dive-with-output_63_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e60ccfb5582f57e813fbcd3ee55c35677f3cb391e7a4eb67b10319ee911f341 +size 314537 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst index 4a76c94a411d17..537ae36f6a331c 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst @@ -57,12 +57,11 @@ low-confident prediction, and wrong prediction. - `Select inference device <#select-inference-device>`__ - `Load the Model <#load-the-model>`__ - - `Define preprocess_fn and - postprocess_fn <#define-preprocess_fn-and-postprocess_fn>`__ + - `Define preprocess_fn <#define-preprocess_fn>`__ - `Explain <#explain>`__ - - `Create explainer <#create-explainer>`__ + - `Create Explainer object <#create-explainer-object>`__ - `Import ImageNet label names <#import-imagenet-label-names>`__ - `Explain using ImageNet labels <#explain-using-imagenet-labels>`__ @@ -110,7 +109,11 @@ Install requirements # Install openvino package %pip install -q "openvino>=2024.2.0" opencv-python tqdm - %pip install -q --no-deps "openvino-xai>=1.0.0" + + # Install openvino xai package + %pip install -q --no-deps "openvino-xai>=1.1.0" + %pip install -q -U "numpy==1.*" + %pip install -q scipy if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" @@ -184,12 +187,6 @@ classify since they’re all dog breeds. image_folder_path = data_folder / "imagewoof320" / "imagewoof320" - -.. parsed-literal:: - - Dataset is already downloaded to artifacts and extracted. - - .. code:: ipython3 # Create list of images to explain @@ -236,12 +233,6 @@ scaling and normalization with certain values. else: print(f"{model_name} already downloaded to {base_artifacts_dir}") - -.. parsed-literal:: - - mobilenetv3_large_100.ra_in1k already downloaded to artifacts - - Prepare model to run inference ------------------------------ @@ -260,15 +251,6 @@ select device from dropdown list for running inference using OpenVINO device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Load the Model ~~~~~~~~~~~~~~ @@ -281,17 +263,17 @@ Load the Model model = core.read_model(model=model_xml_path) compiled_model = core.compile_model(model=model, device_name=device.value) -Define preprocess_fn and postprocess_fn -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Define ``preprocess_fn`` +~~~~~~~~~~~~~~~~~~~~~~~~ -To run model inference, you need to define functions to preprocess data -and postprocess the results based on the model’s implementation. Since -the used model is originally from `timm -storage `__, we -need to apply specific timm preprocessing, including normalization and -scaling with certain values. +This notebook using ``WHITEBOX`` mode for model explanation - it is +required to define function to preprocess data (the alternative is to +preprocess input data). Since the used model is originally from `timm +storage `__, it is +required to apply specific timm preprocessing, including normalization +and scaling with certain values. .. code:: ipython3 @@ -312,40 +294,22 @@ scaling with certain values. # Add batch dimension x = np.expand_dims(x, 0) return x - - - def postprocess_fn(x: np.ndarray) -> np.ndarray: - """ - Process model prediction - """ - prediction_processed = softmax(x) - # Remove batch dimention - return prediction_processed[0] - - - def softmax(x): - """Compute softmax values of x.""" - e_x = np.exp(x - np.max(x)) - return e_x / e_x.sum() Explain ------- -Create explainer -~~~~~~~~~~~~~~~~ - - +Create ``Explainer`` object +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``explainer`` can internally apply pre-processing during model -inference, allowing raw images as input. -To enable this, define ``preprocess_fn`` and provide it to the explainer -constructor. In cases where we pass multiple unprocessed images, as done -in this notebook, we need to define ``preprocess_fn``. -If it’s not defined, it is assumed that the input is preprocessed. +The ``Explainer`` object can internally apply pre-processing during +model inference, allowing raw images as input. To enable this, define +``preprocess_fn`` and provide it to the explainer constructor. If +``preprocess_fn`` is not defined, it is assumed that the input is +preprocessed. .. code:: ipython3 @@ -375,11 +339,10 @@ Import ImageNet label names If ``label_names`` are not provided to the explainer call, the saved saliency map will have the predicted class index, not the name. For -example, ``image_name_target_167.jpg`` instead of -``image_name_target_English_foxhound.jpg``. +example, ``167.jpg`` instead of ``English_foxhound.jpg``. -To conveniently view label names in saliency maps, we provide ImageNet -label names information to the explanation call. +To conveniently view label names in saliency maps, we prepare and +provide ImageNet label names information to the explanation call. .. code:: ipython3 @@ -430,10 +393,10 @@ to the explainer. targets=[ "flat-coated_retriever", "Samoyed", - ], # Also label indices [206, 258] are possible as target + ], # also label indices [206, 258] are possible as target label_names=imagenet_labels, ) - explanation.save(output, Path(image_path).stem) + explanation.save(output, f"{Path(image_path).stem}_") # pass prefix name with underscore Below in ``base_artifacts_dir / "saliency_maps" / "multiple_images"`` you can see saved saliency maps: @@ -447,8 +410,8 @@ you can see saved saliency maps: .. parsed-literal:: - artifacts/saliency_maps/multiple_images/n02105641_2491_target_flat-coated_retriever.jpg - artifacts/saliency_maps/multiple_images/n02105641_2491_target_Samoyed.jpg + artifacts/saliency_maps/multiple_images/n02088364_5768_Samoyed.jpg + artifacts/saliency_maps/multiple_images/n02088364_5768_flat-coated_retriever.jpg Notable use cases in ImageWoof dataset @@ -544,6 +507,20 @@ The cell below contains paths to images with those respective use cases: print(f"Predicted class {imagenet_labels[index]}, index {index}, probability: {score:.2f}") return result_infer, result_idxs, result_scores + + + def postprocess_fn(x: np.ndarray) -> np.ndarray: + """ + Process model prediction + """ + prediction_processed = softmax(x) + return prediction_processed[0] # remove batch dimension + + + def softmax(x): + """Compute softmax values of x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() Explain for each use case ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -573,22 +550,22 @@ Explain for each use case explanation = explainer( image, - targets=result_idxs, # Return saliency maps for predicted classes + targets=result_idxs, # return saliency maps for predicted classes label_names=imagenet_labels, overlay=True, ) - # Save saliency maps, use detailed implementation instead of `explanation.save` - # to return predicted scores for saliency maps as well + saliency_map_name_prefix = f"{image_name}_{gt_info}_pr_" + saliency_map_name_postfix = "_" + confidence_scores = {} for idx, score in zip(result_idxs, result_scores): - target_name = imagenet_labels[idx] - cv2.imwrite( - os.path.join( - output / use_case, - f"{image_name}_{gt_info}_pr_{target_name}_{score:.2f}.jpg", - ), - img=explanation.saliency_map[idx], - ) + confidence_scores[idx] = score + explanation.save( + dir_path=(output / use_case), + prefix=saliency_map_name_prefix, + postfix=saliency_map_name_postfix, + confidence_scores=confidence_scores, + ) .. parsed-literal:: @@ -628,30 +605,30 @@ Explain for each use case True_positive_high_confidence - n02111889_17737_gt_Samoyed_0.94_pr_Samoyed_0.94 - n02099601_6505_gt_golden retriever_0.88_pr_golden_retriever_0.88 n02088364_2019_gt_beagle_0.97_pr_beagle_0.97 + n02099601_6505_gt_golden retriever_0.88_pr_golden_retriever_0.88 n02105641_817_gt_Old English sheepdog_0.96_pr_Old_English_sheepdog_0.96 + n02111889_17737_gt_Samoyed_0.94_pr_Samoyed_0.94 True_positive_low_confidence - n02086240_1422_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 - n02086240_3709_gt_Shih-Tzu_0.20_pr_Shih-Tzu_0.20 n02099601_7942_gt_golden retriever_0.18_pr_golden_retriever_0.18 + n02086240_3709_gt_Shih-Tzu_0.20_pr_Shih-Tzu_0.20 + n02086240_1422_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 n02086240_1765_gt_Shih-Tzu_0.18_pr_Shih-Tzu_0.18 False_positive_high_confidence n02088364_12304_gt_beagle_0.01_pr_car_mirror_0.82 - n02111889_14926_gt_Samoyed_0.03_pr_Arctic_fox_0.95 - n02111889_1931_gt_Samoyed_0.07_pr_dogsled_0.79 - n02115641_5752_gt_dingo_0.02_pr_Chihuahua_0.93 + n02088364_2430_gt_beagle_0.00_pr_bannister_0.78 n02099601_4933_gt_golden retriever_0.05_pr_bubble_0.79 n02096294_2323_gt_Australian terrier_0.00_pr_quilt_0.80 - n02088364_2430_gt_beagle_0.00_pr_bannister_0.78 + n02115641_5752_gt_dingo_0.02_pr_Chihuahua_0.93 + n02111889_1931_gt_Samoyed_0.07_pr_dogsled_0.79 n02087394_6357_gt_Rhodesian ridgeback_0.00_pr_dalmatian_0.98 + n02111889_14926_gt_Samoyed_0.03_pr_Arctic_fox_0.95 True_positive_two_predictions - n02111889_374_gt_Samoyed_0.43_pr_Samoyed_0.43 n02099601_634_gt_golden retriever_0.30_pr_golden_retriever_0.30 + n02111889_374_gt_Samoyed_0.43_pr_Samoyed_0.43 n02099601_634_gt_golden retriever_0.30_pr_Labrador_retriever_0.57 n02111889_374_gt_Samoyed_0.43_pr_crib_0.39 @@ -693,6 +670,7 @@ of pictures, their names, and the confidence of predictions: for image_path, ax in zip(image_paths, axs): image_sal_map = cv2.imread(f"{use_case_output_dir}/{image_path}") + image_sal_map = cv2.cvtColor(image_sal_map, cv2.COLOR_BGR2RGB) image_name = Path(image_path).stem image_name = image_name.replace("_target", "") diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png index f149c148287d67..1a9f33c3368b17 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_50_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff5a5cd559b24fdfae9eb238438d8e1e65be9c5878d2d8a48551038a2175dd90 -size 935667 +oid sha256:cbb403d4ab869af3d0d82a3a7980192f2da68c7df2fe27ebd34340465592f46e +size 974504 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png index 281a37e1ff476e..62018be36e4c3a 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_53_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:056291c27310ffbff98af36276ead5fa216a04e6f55507e0559a86e277783499 -size 893683 +oid sha256:ad8213f746e218068621812a53f4ec4337fb1d0f8fcc763566e5f9e4251cbcb2 +size 917046 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png index 0a23b9ac833d18..40b6043e87691d 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_56_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b05bbfa7b014ab362a42ba8cf97c4604fd486b43b76e365802681fb3c2678d6 -size 673321 +oid sha256:12061dc812cec3219863c5936441baa8ce5b86015acf978ca3d4dcf1212b9c02 +size 681815 diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png index 5ca94c55195397..a04779fd42ed48 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output_files/explainable-ai-3-map-interpretation-with-output_59_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e61cbfbf2fbac5b143f72a31d5de6ecabb6c6e705995129966becbb09bfa0cec -size 715492 +oid sha256:3f48b01dde2edffcea65f08fafe13ff158314f7e75534bbc5d7af027cbc43f5e +size 746506 diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index 65bd0c194a8116..e0f20e0f79974b 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -77,8 +77,8 @@ Install requirements .. code:: ipython3 - %pip install -q "ultralytics==8.2.24" "onnx<1.16.2" tqdm --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino-dev>=2024.0.0" + %pip install -q "ultralytics==8.2.24" "matplotlib>=3.4" "onnx<1.16.2" tqdm --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.4.0" %pip install -q "nncf>=2.9.0" %pip install -q "gradio>=4.13" @@ -158,7 +158,7 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:02<00:00, 67.6MB/s] + 100%|██████████| 138M/138M [00:02<00:00, 67.7MB/s] @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 612.7ms - Speed: 3.0ms preprocess, 612.7ms inference, 794.5ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 728.3ms + Speed: 3.1ms preprocess, 728.3ms inference, 768.2ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -214,10 +214,10 @@ tracing. The FastSAM model itself is based on YOLOv8 model. PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 6.0s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) + OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) - Export complete (9.0s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Export complete (9.1s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 498.1ms - Speed: 5.7ms preprocess, 498.1ms inference, 31.2ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 504.9ms + Speed: 5.8ms preprocess, 504.9ms inference, 31.6ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -615,8 +615,8 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 70 seconds. - Resulting in 1.83 fps + Segmented in 69 seconds. + Resulting in 1.86 fps .. code:: ipython3 @@ -643,9 +643,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 21 seconds - Resulting in 6.1 fps - That is 3.33 times faster! + Segmented in 22 seconds + Resulting in 5.82 fps + That is 3.14 times faster! Try out the converted pipeline diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index a09f2a1ea60399..e929a95fb182c1 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -51,20 +51,12 @@ Prerequisites .. code:: ipython3 - import platform - - %pip install -q "openvino>=2024.3.0" "einops" "torch>2.1" "torchvision" "timm>=0.9.8" "transformers>=4.41" "pillow" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2024.3.0" "einops" "torch>2.1" "torchvision" "matplotlib>=3.4" "timm>=0.9.8" "transformers>=4.41" "pillow" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -108,10 +100,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-10-08 02:10:50.200273: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:10:50.234398: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:28:54.034484: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:28:54.069316: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:10:50.883345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:28:54.728430: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -201,49 +193,49 @@ pipeline. .. parsed-literal:: - config.json: 0%| | 0.00/2.43k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False diff --git a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png index a1484a3eb4b6d0..37d11a47fd30c9 100644 --- a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png +++ b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5808e660c685e4518f0a83232c2b9c9e88263e05ecd05891f769d263df4c642 -size 259656 +oid sha256:d85b3df68708172ed849a9e182bdec6a94f0174643833bd8cc7184ac0d090fae +size 259636 diff --git a/docs/notebooks/flux.1-image-generation-with-output.rst b/docs/notebooks/flux.1-image-generation-with-output.rst index 09a05c0e73bf8d..fe0c47899c3601 100644 --- a/docs/notebooks/flux.1-image-generation-with-output.rst +++ b/docs/notebooks/flux.1-image-generation-with-output.rst @@ -26,7 +26,11 @@ using OpenVINO. - `Prerequisites <#prerequisites>`__ - `Select model <#select-model>`__ - `Convert model with OpenVINO <#convert-model-with-openvino>`__ -- `Compress model weights <#compress-model-weights>`__ + + - `Convert model using Optimum + Intel <#convert-model-using-optimum-intel>`__ + - `Compress model weights <#compress-model-weights>`__ + - `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Interactive demo <#interactive-demo>`__ @@ -47,26 +51,19 @@ Prerequisites .. code:: ipython3 - %pip install -q "gradio>=4.19" "torch>=2.1" "transformers" "nncf>=2.12.0" "diffusers>=0.30.0" "opencv-python" "pillow" "peft>=0.7.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "gradio>=4.19" "torch>=2.1" "transformers" "nncf>=2.12.0" "diffusers>=0.31.0" "opencv-python" "pillow" "peft>=0.7.0" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "sentencepiece" "protobuf" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" %pip install -qU "openvino>=2024.4.0" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import requests from pathlib import Path - if not Path("flux_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/flux.1-image-generation/flux_helper.py") - open("flux_helper.py", "w").write(r.text) + if not Path("cmd_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") + open("cmd_helper.py", "w").write(r.text) if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/flux.1-image-generation/gradio_helper.py") @@ -108,20 +105,20 @@ FLUX.1-dev version using widget bellow. .. code:: ipython3 - from flux_helper import get_model_selector + import ipywidgets as widgets + + model_ids = ["black-forest-labs/FLUX.1-schnell", "black-forest-labs/FLUX.1-dev"] + + model_selector = widgets.Dropdown( + options=model_ids, + default=model_ids[0], + description="Model:", + ) + - model_selector = get_model_selector() model_selector -.. parsed-literal:: - - 2024-10-08 02:11:42.908018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:11:42.941481: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:11:43.614104: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: @@ -171,101 +168,44 @@ The pipeline consists of four important parts: - Transformer for step-by-step denoising latent image representation. - Autoencoder (VAE) for decoding latent space to image. -We will use ``convert_flux`` helper function defined in -`flux_helper.py `__ that create original PyTorch model -and convert each part of pipeline using ``ov.convert_model``. - -.. code:: ipython3 - - from flux_helper import convert_flux - - # uncomment the line to see model conversion code - # ??convert_flux - -.. code:: ipython3 - - model_dir = convert_flux(model_selector.value) - - +Convert model using Optimum Intel +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/7 [00:00`__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. -.. parsed-literal:: +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. - Loading checkpoint shards: 0%| | 0/2 [00:00 --task - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4779: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if past_key_values_length > 0: - - -.. parsed-literal:: - - ✅ Clip Text encoder conversion finished - ⌛ T5 Text encoder conversion started - ✅ T5 Text encoder conversion finished - ⌛ VAE decoder conversion started - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if hidden_states.shape[0] >= 64: - - -.. parsed-literal:: - - ✅ VAE decoder onversion finished - ✅ black-forest-labs/FLUX.1-schnell successfully converted and can be found in FLUX.1-schnell - - -.. code:: ipython3 - - from flux_helper import TRANSFORMER_PATH, VAE_DECODER_PATH, TEXT_ENCODER_PATH, TEXT_ENCODER_2_PATH - - model_dict = { - "transformer": model_dir / TRANSFORMER_PATH, - "text_encoder": model_dir / TEXT_ENCODER_PATH, - "text_encoder_2": model_dir / TEXT_ENCODER_2_PATH, - "vae": model_dir / VAE_DECODER_PATH, - } +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. Compress model weights ----------------------- +~~~~~~~~~~~~~~~~~~~~~~ @@ -278,14 +218,16 @@ where the size of weights is relatively larger than the size of activations, for example, Large Language Models (LLM). Compared to INT8 compression, INT4 compression improves performance even more, but introduces a minor drop in prediction quality. We will use -`NNCF `__ for weight -compression. +`NNCF `__ integration to +``optimum-cli`` tool for weight compression. .. code:: ipython3 - from flux_helper import weight_compression_widget - - to_compress = weight_compression_widget() + to_compress = widgets.Checkbox( + value=True, + description="Weight compression", + disabled=False, + ) to_compress @@ -300,229 +242,99 @@ compression. .. code:: ipython3 - import nncf - import openvino as ov - import gc + from pathlib import Path - compression_args = {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 1.0} + model_id = model_selector.value - int4_model_dict = {} + model_base_dir = Path(model_id.split("/")[-1]) + additional_args = {} if to_compress.value: - core = ov.Core() - - for model_name, model_path in model_dict.items(): - int4_path = model_path.parent / (model_path.stem + "_int4.xml") - if not int4_path.exists(): - print(f"⌛ {model_path.stem} compression started") - print( - f"Compression parameters:\n\tmode = {compression_args['mode']}\n\tratio = {compression_args['ratio']}\n\tgroup_size = {compression_args['group_size']}" - ) - model = core.read_model(model_path) - compressed_model = nncf.compress_weights(model, **compression_args) - ov.save_model(compressed_model, int4_path) - print(f"✅ {model_path.stem} compression finished") - del compressed_model - del model - gc.collect() - print(f"Compressed {model_path.stem} can be found in {int4_path}") - int4_model_dict[model_name] = int4_path - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - ⌛ transformer compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 0% (1 / 502) │ 0% (0 / 501) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 100% (501 / 502) │ 100% (501 / 501) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: + model_dir = model_base_dir / "INT4" + additional_args.update({"weight-format": "int4", "group-size": "64", "ratio": "1.0"}) + else: + model_dir = model_base_dir / "FP16" + additional_args.update({"weight-format": "fp16"}) - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ transformer compression finished - Compressed transformer can be found in FLUX.1-schnell/transformer/transformer_int4.xml - ⌛ text_encoder compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 33% (3 / 74) │ 0% (0 / 71) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 67% (71 / 74) │ 100% (71 / 71) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ text_encoder compression finished - Compressed text_encoder can be found in FLUX.1-schnell/text_encoder/text_encoder_int4.xml - ⌛ text_encoder_2 compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 4% (3 / 170) │ 0% (0 / 167) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 96% (167 / 170) │ 100% (167 / 167) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ text_encoder_2 compression finished - Compressed text_encoder_2 can be found in FLUX.1-schnell/text_encoder_2/text_encoder_2_int4.xml - ⌛ vae_decoder compression started - Compression parameters: - mode = int4_sym - ratio = 1.0 - group_size = 64 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 98% (36 / 39) │ 0% (0 / 3) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 2% (3 / 39) │ 100% (3 / 3) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - ✅ vae_decoder compression finished - Compressed vae_decoder can be found in FLUX.1-schnell/vae/vae_decoder_int4.xml +.. code:: ipython3 + from cmd_helper import optimum_cli + + if not model_dir.exists(): + optimum_cli(model_id, model_dir, additional_args=additional_args) Run OpenVINO model inference ---------------------------- -``OVFluxPipeline`` class defined in ``flux_helper.py`` provides -convenient way for running model. It accepts directory with converted -model and inference device as arguments. +``OVDiffusionPipeline`` from Optimum Intel provides ready-to-use +interface for running Diffusers models using OpenVINO. It supports +various models including Stable Diffusion, Stable Diffusion XL, LCM, +Stable Diffusion v3 and Flux. Similar to original Diffusers pipeline, +for initialization, we should use ``from_preptrained`` method providing +model id from HuggingFace hub or local directory (both original PyTorch +and OpenVINO models formats supported, in the first case model class +additionally will trigger model conversion). .. code:: ipython3 - from flux_helper import get_pipeline_selection_option + from notebook_utils import device_widget - use_compressed = get_pipeline_selection_option(int4_model_dict) - use_compressed + device = device_widget(default="CPU", exclude=["NPU"]) + device .. parsed-literal:: - Checkbox(value=True, description='Use compressed models') + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') .. code:: ipython3 - from flux_helper import OVFluxPipeline, init_pipeline # noqa: F401 + import ipywidgets as widgets - # uncomment the line to see model pipeline - # ??OVFluxPipeline - -.. code:: ipython3 - - from notebook_utils import device_widget + model_available = (model_base_dir / "INT4").is_dir() + use_quantized_models = widgets.Checkbox( + value=model_available, + description="Use compressed models", + disabled=not model_available, + ) - device = device_widget(default="CPU", exclude=["NPU"]) - device + use_quantized_models .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + Checkbox(value=True, description='Use compressed models') .. code:: ipython3 - ov_pipe = init_pipeline(model_dir, model_dict if not use_compressed.value else int4_model_dict, device.value) + from optimum.intel.openvino import OVDiffusionPipeline + + model_dir = model_base_dir / "INT4" if use_quantized_models.value else model_base_dir / "FP16" + + ov_pipe = OVDiffusionPipeline.from_pretrained(model_dir, device=device.value) .. parsed-literal:: - Models compilation - ✅ transformer - Done! - ✅ text_encoder - Done! - ✅ text_encoder_2 - Done! - ✅ vae - Done! - + 2024-10-28 18:12:30.714636: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-28 18:12:30.727116: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + WARNING: All log messages before absl::InitializeLog() is called are written to STDERR + E0000 00:00:1730124750.741387 52454 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + E0000 00:00:1730124750.745955 52454 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-10-28 18:12:30.761443: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers + .. code:: ipython3 @@ -544,7 +356,7 @@ model and inference device as arguments. -.. image:: flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png +.. image:: flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png @@ -565,20 +377,6 @@ Interactive demo # demo.launch(share=True) # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/ try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(debug=False, share=True) - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - + demo.launch(debug=True, share=True) diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.jpg b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.jpg new file mode 100644 index 00000000000000..6ad8e593ef5115 --- /dev/null +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5caeca2e3ae8e99146bb786321cbad04584523b693e3cbdba2a5f8c7f0dbb096 +size 13490 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png new file mode 100644 index 00000000000000..7cb1e2903a902f --- /dev/null +++ b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_16_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de23e409e068f39c1e0e568b921174c80b0c42e29a1473eeeba54a91b88aaef +size 113356 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg deleted file mode 100644 index d4dcc2dcc6d8b2..00000000000000 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8160a0e39a306d7337c724b69f447f1b3aca95eaf93de866194e65678c0d01ba -size 14210 diff --git a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png b/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png deleted file mode 100644 index b7d17c76ea39b4..00000000000000 --- a/docs/notebooks/flux.1-image-generation-with-output_files/flux.1-image-generation-with-output_20_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:81d23cd276ecf3af1eda5b11e80510737046dd245e3d295965a23f596f4b99f5 -size 115703 diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index 73db954afdfbbb..fe2ac780f5cca6 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -104,7 +104,7 @@ Check if FreeVC is installed and append its path to ``sys.path`` remote: Counting objects: 100% (74/74), done. remote: Compressing objects: 100% (47/47), done. remote: Total 131 (delta 43), reused 27 (delta 27), pack-reused 57 (from 1) - Receiving objects: 100% (131/131), 15.28 MiB | 17.35 MiB/s, done. + Receiving objects: 100% (131/131), 15.28 MiB | 17.50 MiB/s, done. Resolving deltas: 100% (43/43), done. @@ -134,8 +134,8 @@ Check if FreeVC is installed and append its path to ``sys.path`` Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [00:28<00:00, 44.0MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [00:32<00:00, 38.5MB/s] .. code:: ipython3 @@ -239,13 +239,13 @@ Models initialization .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") .. parsed-literal:: - Loaded the voice encoder model on cpu in 0.00 seconds. + Loaded the voice encoder model on cpu in 0.01 seconds. Reading dataset settings @@ -288,7 +288,7 @@ Inference .. parsed-literal:: - 2it [00:03, 1.90s/it] + 2it [00:04, 2.03s/it] Result audio files should be available in ‘outputs/freevc’ @@ -360,13 +360,13 @@ Converting to OpenVINO’s IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert embed_dim == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert list(query.size()) == [tgt_len, bsz, embed_dim] - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert key_bsz == bsz - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert src_len, bsz == value.shape[:2] @@ -581,12 +581,12 @@ function to OpenVINO IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 25912 / 25920 (100.0%) - Greatest absolute difference: 1.0370872616767883 at index (0, 0, 18175) (up to 1e-05 allowed) - Greatest relative difference: 8656.24884080371 at index (0, 0, 11526) (up to 1e-05 allowed) + Mismatched elements: 25915 / 25920 (100.0%) + Greatest absolute difference: 1.3485908806324005 at index (0, 0, 24258) (up to 1e-05 allowed) + Greatest relative difference: 8204.075456053068 at index (0, 0, 5777) (up to 1e-05 allowed) _check_trace( @@ -645,7 +645,7 @@ And now we can check inference using only IR models. .. parsed-literal:: - 2it [00:01, 1.29it/s] + 2it [00:01, 1.31it/s] Result audio files should be available in ‘outputs/freevc’ and you can @@ -707,7 +707,7 @@ Result audio: diff --git a/docs/notebooks/gpu-device-with-output.rst b/docs/notebooks/gpu-device-with-output.rst index 2b35846b766bd1..732cc297aa9531 100644 --- a/docs/notebooks/gpu-device-with-output.rst +++ b/docs/notebooks/gpu-device-with-output.rst @@ -21,13 +21,7 @@ Working with GPUs in OpenVINO™ - `Compiling a Model on GPU <#compiling-a-model-on-gpu>`__ - - `Download and Convert a Model <#download-and-convert-a-model>`__ - - - `Download and unpack the - Model <#download-and-unpack-the-model>`__ - - `Convert the Model to OpenVINO IR - format <#convert-the-model-to-openvino-ir-format>`__ - + - `Download a Model <#download-a-model>`__ - `Compile with Default Configuration <#compile-with-default-configuration>`__ - `Reduce Compile Time through Model @@ -119,10 +113,7 @@ Install required packages .. code:: ipython3 - %pip install -q "openvino-dev>=2024.0.0" "opencv-python" "tqdm" - %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 - %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 - %pip install -q "tensorflow>=2.5; sys_platform != 'darwin' and python_version > '3.8'" + %pip install -q "openvino>=2024.4.0" "opencv-python" "tqdm" "huggingface_hub" Checking GPUs with Query Device ------------------------------- @@ -305,8 +296,8 @@ properties. We can easily use one for compiling and running models with OpenVINO `GPU plugin `__. -Download and Convert a Model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Download a Model +~~~~~~~~~~~~~~~~ @@ -317,19 +308,9 @@ was trained on `Common Objects in Context categories of object. For details, see the `paper `__. -Download and unpack the Model -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Use the ``download_file`` function from the ``notebook_utils`` to -download an archive with the model. It automatically creates a directory -structure and downloads the selected model. This step is skipped if the -package is already downloaded. - .. code:: ipython3 - import tarfile + import huggingface_hub as hf_hub from pathlib import Path # Fetch `notebook_utils` module @@ -340,95 +321,18 @@ package is already downloaded. ) open("notebook_utils.py", "w").write(r.text) - from notebook_utils import download_file # A directory where the model will be downloaded. base_model_dir = Path("./model").expanduser() - model_name = "ssdlite_mobilenet_v2" - archive_name = Path(f"{model_name}_coco_2018_05_09.tar.gz") - - # Download the archive - downloaded_model_path = base_model_dir / archive_name - if not downloaded_model_path.exists(): - model_url = f"http://download.tensorflow.org/models/object_detection/{archive_name}" - download_file(model_url, downloaded_model_path.name, downloaded_model_path.parent) - - # Unpack the model - tf_model_path = base_model_dir / archive_name.with_suffix("").stem / "frozen_inference_graph.pb" - if not tf_model_path.exists(): - with tarfile.open(downloaded_model_path) as file: - file.extractall(base_model_dir) - - - -.. parsed-literal:: - - model/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz: 0%| | 0.00/48.7M [00:00`__. - -.. code:: ipython3 - - from openvino.tools.mo.front import tf as ov_tf_front - - precision = "FP16" + model_name = "ssdlite_mobilenet_v2_fp16" - # The output path for the conversion. - model_path = base_model_dir / "ir_model" / f"{model_name}_{precision.lower()}.xml" + ov_model_path = base_model_dir / model_name / f"{model_name}.xml" - trans_config_path = Path(ov_tf_front.__file__).parent / "ssd_v2_support.json" - pipeline_config = base_model_dir / archive_name.with_suffix("").stem / "pipeline.config" + if not (ov_model_path).exists(): + hf_hub.snapshot_download("katuni4ka/ssdlite_mobilenet_v2_fp16", local_dir=base_model_dir) - model = None - if not model_path.exists(): - model = ov.tools.mo.convert_model( - input_model=tf_model_path, - input_shape=[1, 300, 300, 3], - layout="NHWC", - transformations_config=trans_config_path, - tensorflow_object_detection_api_pipeline_config=pipeline_config, - reverse_input_channels=True, - ) - ov.save_model(model, model_path, compress_to_fp16=(precision == "FP16")) - print("IR model saved to {}".format(model_path)) - else: - print("Read IR model from {}".format(model_path)) - model = core.read_model(model_path) - - -.. parsed-literal:: - - [ WARNING ] The Preprocessor block has been removed. Only nodes performing mean value subtraction and scaling (if applicable) are kept. - - -.. parsed-literal:: - - IR model saved to model/ir_model/ssdlite_mobilenet_v2_fp16.xml - + model = core.read_model(ov_model_path) Compile with Default Configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -484,7 +388,7 @@ following: core.set_property({props.cache_dir(): cache_folder}) # Compile the model as before - model = core.read_model(model=model_path) + model = core.read_model(ov_model_path) compiled_model = core.compile_model(model, device) print(f"Cache enabled (first time) - compile time: {time.time() - start}s") @@ -502,13 +406,13 @@ compile times with caching enabled and disabled as follows: start = time.time() core = ov.Core() core.set_property({props.cache_dir(): "cache"}) - model = core.read_model(model=model_path) + model = core.read_model(model=ov_model_path) compiled_model = core.compile_model(model, device) print(f"Cache enabled - compile time: {time.time() - start}s") start = time.time() core = ov.Core() - model = core.read_model(model=model_path) + model = core.read_model(ov_model_path) compiled_model = core.compile_model(model, device) print(f"Cache disabled - compile time: {time.time() - start}s") diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index ee6741ff7af4e7..232629422b14e0 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -124,16 +124,16 @@ segmentation you can select vanilla ``SAM``. Cloning into 'GroundingDINO'... remote: Enumerating objects: 379, done. remote: Counting objects: 100% (190/190), done. - remote: Compressing objects: 100% (80/80), done. - remote: Total 379 (delta 135), reused 110 (delta 110), pack-reused 189 (from 1) - Receiving objects: 100% (379/379), 14.03 MiB | 19.90 MiB/s, done. + remote: Compressing objects: 100% (79/79), done. + remote: Total 379 (delta 136), reused 111 (delta 111), pack-reused 189 (from 1) + Receiving objects: 100% (379/379), 14.03 MiB | 20.95 MiB/s, done. Resolving deltas: 100% (194/194), done. Cloning into 'EfficientSAM'... remote: Enumerating objects: 424, done. remote: Counting objects: 100% (85/85), done. remote: Compressing objects: 100% (33/33), done. remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 25.51 MiB/s, done. + Receiving objects: 100% (424/424), 262.14 MiB | 24.44 MiB/s, done. Resolving deltas: 100% (246/246), done. @@ -222,6 +222,11 @@ GroundingDINO imports .. parsed-literal:: + 2024-11-05 01:34:53.765709: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:34:53.988314: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 01:34:54.760718: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers UserWarning: Failed to load custom C++ ops. Running on CPU mode Only! @@ -361,6 +366,30 @@ Convert GroundingDINO to OpenVINO IR format TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + + +.. parsed-literal:: + + output layer_id 0 is nan + num_nan 230400, num_inf 0 + output layer_id 1 is nan + num_nan 230400, num_inf 0 + output layer_id 2 is nan + num_nan 230400, num_inf 0 + output layer_id 3 is nan + num_nan 230400, num_inf 0 + output layer_id 4 is nan + num_nan 230400, num_inf 0 + output layer_id 5 is nan + num_nan 230400, num_inf 0 + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. Run OpenVINO optimized GroundingDINO @@ -503,15 +532,6 @@ class, but the inference will be done using OpenVINO optimized model. boxes_filt, pred_phrases, logits_filt = get_ov_grounding_output(ov_compiled_grounded_dino, pil_image, classes_prompt, BOX_THRESHOLD, TEXT_THRESHOLD) - -.. parsed-literal:: - - 2024-10-08 02:28:09.725059: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:28:09.764729: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:28:10.354526: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - Convert predicted boxes to supervision box detections format .. code:: ipython3 @@ -572,11 +592,6 @@ segmentation. First of all let’s convert ``SAM`` model to OpenVINO IR. ov_efficient_sam = core.read_model(ov_efficient_sam_path) -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - .. parsed-literal:: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! diff --git a/docs/notebooks/handwritten-ocr-with-output.rst b/docs/notebooks/handwritten-ocr-with-output.rst index 9f03d782ed2235..a66f73f07d99b4 100644 --- a/docs/notebooks/handwritten-ocr-with-output.rst +++ b/docs/notebooks/handwritten-ocr-with-output.rst @@ -49,21 +49,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + # Install openvino package + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports diff --git a/docs/notebooks/hello-detection-with-output.rst b/docs/notebooks/hello-detection-with-output.rst index 85dff2edc5cb31..d9293f8da61279 100644 --- a/docs/notebooks/hello-detection-with-output.rst +++ b/docs/notebooks/hello-detection-with-output.rst @@ -41,7 +41,7 @@ Guide =2023.1.0" opencv-python tqdm + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: diff --git a/docs/notebooks/hello-segmentation-with-output.rst b/docs/notebooks/hello-segmentation-with-output.rst index 6f55f7666710cc..6ddc0e3b0aa78b 100644 --- a/docs/notebooks/hello-segmentation-with-output.rst +++ b/docs/notebooks/hello-segmentation-with-output.rst @@ -35,21 +35,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -196,7 +188,7 @@ is provided. .. parsed-literal:: - + @@ -223,7 +215,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hello-world-with-output.rst b/docs/notebooks/hello-world-with-output.rst index 23ea9c15b85df0..5bd1216db29701 100644 --- a/docs/notebooks/hello-world-with-output.rst +++ b/docs/notebooks/hello-world-with-output.rst @@ -37,23 +37,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - - + # Install required packages + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports diff --git a/docs/notebooks/hugging-face-hub-with-output.rst b/docs/notebooks/hugging-face-hub-with-output.rst index b13205541f558f..a92f8cd18fba31 100644 --- a/docs/notebooks/hugging-face-hub-with-output.rst +++ b/docs/notebooks/hugging-face-hub-with-output.rst @@ -132,6 +132,10 @@ tutorials `__. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - 2024-10-08 02:29:42.074725: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:29:42.108881: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:29:42.698091: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Initialize and Convert the Model Automatically using OVModel class @@ -372,23 +379,9 @@ inference run. .. parsed-literal:: - Framework not specified. Using pt to export the model. Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - Compiling the model to AUTO ... Convert model using Optimum CLI interface @@ -443,7 +436,7 @@ Full list of supported arguments available via ``--help`` .. parsed-literal:: - 2024-10-08 02:29:55.851395: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:37:12.161579: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code] [--weight-format {fp32,fp16,int8,int4,mxfp4}] @@ -474,20 +467,20 @@ Full list of supported arguments available via ``--help`` --task TASK The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: - ['image-classification', 'depth-estimation', 'object- - detection', 'zero-shot-object-detection', 'text-to- - audio', 'token-classification', 'audio-frame- - classification', 'semantic-segmentation', 'automatic- - speech-recognition', 'image-segmentation', 'question- - answering', 'text-classification', 'multiple-choice', - 'inpainting', 'masked-im', 'text2text-generation', - 'image-to-text', 'text-generation', 'sentence- - similarity', 'mask-generation', 'text-to-image', - 'audio-xvector', 'zero-shot-image-classification', - 'fill-mask', 'image-to-image', 'feature-extraction', - 'audio-classification']. For decoder models, use `xxx- - with-past` to export the model using past key values - in the decoder. + ['mask-generation', 'image-classification', 'fill- + mask', 'audio-xvector', 'audio-frame-classification', + 'sentence-similarity', 'multiple-choice', 'automatic- + speech-recognition', 'text-to-image', 'token- + classification', 'image-to-text', 'image- + segmentation', 'question-answering', 'depth- + estimation', 'semantic-segmentation', 'feature- + extraction', 'text-generation', 'zero-shot-object- + detection', 'text-to-audio', 'zero-shot-image- + classification', 'object-detection', 'text2text- + generation', 'audio-classification', 'image-to-image', + 'masked-im', 'inpainting', 'text-classification']. For + decoder models, use `xxx-with-past` to export the + model using past key values in the decoder. --framework {pt,tf} The framework to use for the export. If not provided, will attempt to use the local checkpoint's original framework or what is available in the environment. @@ -592,16 +585,11 @@ compression: .. parsed-literal:: - 2024-10-08 02:30:01.388483: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Framework not specified. Using pt to export the model. + 2024-11-05 01:37:17.680673: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False - OpenVINO Tokenizers is not available. To deploy models in production with C++ code, please follow installation instructions: https://github.com/openvinotoolkit/openvino_tokenizers?tab=readme-ov-file#installation - + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. Tokenizer won't be converted. @@ -612,12 +600,6 @@ be loaded using the same OVModelForXXX class. model = OVModelForSequenceClassification.from_pretrained("models/optimum_model/fp16", device=device.value) - -.. parsed-literal:: - - Compiling the model to AUTO ... - - There are some models in the Hugging Face Models Hub, that are already converted and ready to run! You can filter those models out by library name, just type OpenVINO, or follow `this diff --git a/docs/notebooks/image-bind-with-output.rst b/docs/notebooks/image-bind-with-output.rst index 12321638ed19f8..1e8ecd63c1de0e 100644 --- a/docs/notebooks/image-bind-with-output.rst +++ b/docs/notebooks/image-bind-with-output.rst @@ -131,7 +131,7 @@ Prerequisites import platform - %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q datasets regex librosa soundfile pytorchvideo ftfy "timm>=0.6.7" einops fvcore "openvino>=2024.0.0" "nncf>=2.9.0" numpy scipy --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index f45c3ca5ec3b32..7bf7172f720588 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -54,21 +54,13 @@ Guide =2023.1.0" "nncf>=2.6.0" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" "nncf>=2.6.0" torch torchvision tqdm "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -110,9 +102,9 @@ Model preparation stage has the following steps: Cloning into 'pytorch-cifar-models'... remote: Enumerating objects: 282, done. remote: Counting objects: 100% (281/281), done. - remote: Compressing objects: 100% (96/96), done. - remote: Total 282 (delta 135), reused 269 (delta 128), pack-reused 1 (from 1) - Receiving objects: 100% (282/282), 9.22 MiB | 18.95 MiB/s, done. + remote: Compressing objects: 100% (94/94), done. + remote: Total 282 (delta 135), reused 275 (delta 130), pack-reused 1 (from 1) + Receiving objects: 100% (282/282), 9.22 MiB | 9.58 MiB/s, done. Resolving deltas: 100% (135/135), done. @@ -184,7 +176,7 @@ Preprocessing for model obtained from training .. parsed-literal:: - 100%|██████████| 170498071/170498071 [00:06<00:00, 24572685.83it/s] + 100%|██████████| 170498071/170498071 [00:07<00:00, 22536051.32it/s] .. parsed-literal:: @@ -256,10 +248,10 @@ about supported parameters can be found on this .. parsed-literal:: - 2024-10-08 02:30:41.915322: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:30:41.946467: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:37:57.500572: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:37:57.532367: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:30:42.497931: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:37:58.074631: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -427,7 +419,7 @@ Tool `__ - `Interactive demo <#interactive-demo>`__ + Installation Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -113,7 +114,7 @@ Prerequisites .. code:: ipython3 - %pip install -q "openvino>=2023.3.0" opencv-python transformers "diffusers>=0.24.0" accelerate gdown "scikit-image>=0.19.2" "gradio>=4.19" "nncf>=2.9.0" "datasets>=2.14.6" "peft>=0.6.2" + %pip install -q "openvino>=2023.3.0" opencv-python transformers "diffusers>=0.24.0" "matplotlib>=3.4" accelerate gdown "scikit-image>=0.19.2" "gradio>=4.19" "nncf>=2.9.0" "datasets>=2.14.6" "peft>=0.6.2" Convert and prepare Face IdentityNet ------------------------------------ diff --git a/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst b/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst index 6c27e0431eea09..72a9c88e8ea0c3 100644 --- a/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst +++ b/docs/notebooks/instruct-pix2pix-image-editing-with-output.rst @@ -71,15 +71,8 @@ Install necessary packages .. code:: ipython3 - import platform - - %pip install -q "transformers>=4.25.1" torch accelerate "gradio>4.19" "datasets>=2.14.6" diffusers pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.25.1" torch accelerate "gradio>4.19" "datasets>=2.14.6" "matplotlib>=3.4" diffusers pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" Create Pytorch Models pipeline ------------------------------ diff --git a/docs/notebooks/internvl2-with-output.rst b/docs/notebooks/internvl2-with-output.rst index 038e397209cb6c..ed67209a0303eb 100644 --- a/docs/notebooks/internvl2-with-output.rst +++ b/docs/notebooks/internvl2-with-output.rst @@ -275,6 +275,16 @@ documentation self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -521,7 +531,7 @@ Let’s check model capabilities in answering questions about image: .. parsed-literal:: - The image displays a young red panda cub. This cute creature has a striking mix of dark reds and whites on its fur, with a striking white underbelly and back. The cub is sitting and peering forward with a curious expression. It appears to be peering through a wooden partition, and a piece of bamboo is visible in the bottom-left corner of the image. The background includes green foliage, suggesting a natural habitat for the cub. This cub is cute and looks like it's enjoying + The image shows a red panda lying on its side, partially wrapped in a wooden structure, possibly a container or log. The red panda appears to be looking at the camera with large, expressive eyes, displaying an endearing and lively appearance. The background consists of a portion of the red panda's habitat environment, which appears to be a tree and some greenery. Interactive demo ---------------- diff --git a/docs/notebooks/jina-clip-with-output.rst b/docs/notebooks/jina-clip-with-output.rst index 5b61ee9af2d3e4..1cdb2e1d286245 100644 --- a/docs/notebooks/jina-clip-with-output.rst +++ b/docs/notebooks/jina-clip-with-output.rst @@ -77,7 +77,7 @@ Prerequisites .. code:: ipython3 %pip install -q "openvino>=2024.2.0" "datasets>=2.20" "nncf>=2.11.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" "matplotlib>=3.4" .. parsed-literal:: @@ -104,50 +104,14 @@ weights, using ``from_pretrained`` method. model = AutoModel.from_pretrained("jinaai/jina-clip-v1", trust_remote_code=True) - -.. parsed-literal:: - - configuration_clip.py: 0%| | 0.00/11.7k [00:00=2024.0.0" "nncf>=2.11.0" "datasets>=2.20.0" - %pip install -q "transformers>=4.35" Pillow "gradio>=4.19" opencv-python + %pip install -q "transformers>=4.35" Pillow "gradio>=4.19" opencv-python "matplotlib>=3.4" %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) + Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.3.1) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -152,10 +152,10 @@ example `__ .. parsed-literal:: - 2024-10-08 02:37:47.151795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:37:47.185561: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:44:54.753766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:44:54.788691: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:37:47.732267: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:44:55.309895: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -374,11 +374,11 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -406,7 +406,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -541,13 +541,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1389,9 +1389,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.691 seconds - Optimized pipeline: 1.193 seconds - Performance speed-up: 2.257 + FP32 pipeline: 2.746 seconds + Optimized pipeline: 1.140 seconds + Performance speed-up: 2.409 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index 16b3efe503aea0..2310cb001b0c6b 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af0a33aba1728df2580f26f7ecf01774c2bc8084835df321d825197a945d775f -size 118088 +oid sha256:9ca596f09c0f6c0dafa4aca0fbe7974941301cfcbc6bcb3a8c4255774c347d0b +size 123320 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index c47f776f0af026..91289c35d7c60c 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebd6f6c9ee8fd3adb0dfbaebe7b41e7bdab039063a56ce21f0c6e01429d5ce6b -size 1151007 +oid sha256:56d06f7d654939feda627f67196b813de9b38a718acba9f5daed59a43314829f +size 1150807 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index 90a6e30cff3f2e..d98f56141b1252 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a3b5a0b0c94dec94a9aeb46ebb0ca6af8e63897684e446a299dc31b84851ce2 -size 1148928 +oid sha256:0d7f8506e5f1bd369debee273b45c601d05901af4937d8cc976f985cd4a81fed +size 1149292 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index 073bf8e86591fb..b53344f52b7396 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f41efc8140938f17d9ea5b31d699440fdcc3f3d407eba04abc4d694769f2529 -size 122071 +oid sha256:edd5a47baf47ae90532b47bc5ee05e8503b7d1deda59d956a354688ed949c8b5 +size 121605 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index 5069e51978df61..2edc9a038ff8c3 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa756de5e754a64022b09b445f6851d8ce7373e09d5a776497bbf69513085cc8 -size 1150840 +oid sha256:aa184084b598dac717e99fe9677f1fe9dd4f6b85ec123c075d4109c75b134841 +size 1150675 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index f698389c6f8304..21ecfe511f1b76 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,10 +101,10 @@ Imports .. parsed-literal:: - 2024-10-08 02:44:29.489974: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:44:29.524217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 01:51:49.197259: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 01:51:49.231710: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:44:30.075180: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 01:51:49.783615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -211,7 +211,7 @@ PyTorch model formats are supported: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( @@ -247,7 +247,7 @@ tokenizer from HuggingFace. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( @@ -505,9 +505,9 @@ Frames Per Second (FPS) for images. .. parsed-literal:: - PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.78 - IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.30 - OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 107.63 + PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.68 + IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.24 + OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 108.47 Finally, measure the inference performance of OpenVINO ``FP32`` and @@ -548,27 +548,27 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.82 ms + [ INFO ] Read model took 19.11 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,?] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,?] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 5.48 ms + [ INFO ] Reshape model took 5.55 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] - [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,128] + [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,128] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,128] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 351.96 ms + [ INFO ] Compile model took 344.20 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -609,17 +609,17 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 20.92 ms + [ INFO ] First inference took 22.90 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 6334 iterations - [ INFO ] Duration: 120017.42 ms + [ INFO ] Count: 6485 iterations + [ INFO ] Duration: 120011.48 ms [ INFO ] Latency: - [ INFO ] Median: 18.69 ms - [ INFO ] Average: 18.85 ms - [ INFO ] Min: 17.15 ms - [ INFO ] Max: 26.62 ms - [ INFO ] Throughput: 52.78 FPS + [ INFO ] Median: 18.09 ms + [ INFO ] Average: 18.41 ms + [ INFO ] Min: 17.32 ms + [ INFO ] Max: 26.49 ms + [ INFO ] Throughput: 54.04 FPS .. code:: ipython3 @@ -646,27 +646,27 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 25.69 ms + [ INFO ] Read model took 24.93 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] - [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,?] + [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,?] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,?] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 7.39 ms + [ INFO ] Reshape model took 7.14 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] - [ INFO ] 63 , attention_mask (node: attention_mask) : i64 / [...] / [1,128] + [ INFO ] attention_mask , 63 (node: attention_mask) : i64 / [...] / [1,128] [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,128] [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1128.78 ms + [ INFO ] Compile model took 1080.21 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -707,15 +707,15 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 15.68 ms + [ INFO ] First inference took 16.00 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 12868 iterations - [ INFO ] Duration: 120000.02 ms + [ INFO ] Count: 13181 iterations + [ INFO ] Duration: 120003.10 ms [ INFO ] Latency: - [ INFO ] Median: 9.01 ms - [ INFO ] Average: 9.23 ms - [ INFO ] Min: 8.43 ms - [ INFO ] Max: 12.91 ms - [ INFO ] Throughput: 107.23 FPS + [ INFO ] Median: 8.93 ms + [ INFO ] Average: 9.01 ms + [ INFO ] Min: 7.68 ms + [ INFO ] Max: 12.00 ms + [ INFO ] Throughput: 109.84 FPS diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst index 348a243480aec1..a0bce9d85c7196 100644 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst +++ b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst @@ -9,7 +9,7 @@ and time required to generate an image for provided prompt. The notebook can be also used on other Intel hardware with minimal or no modifications. -|image0| +.. image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 Optimum Intel is an interface from Hugging Face between both diffusers and transformers libraries and various tools provided by Intel to @@ -48,8 +48,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 - Prerequisites ~~~~~~~~~~~~~ @@ -67,9 +65,7 @@ Install required packages .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + .. code:: ipython3 @@ -112,8 +108,12 @@ this .. parsed-literal:: - CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz - + CPU: Intel(R) Core(TM) Ultra 7 155H + GNA.GNA_SW: GNA_SW + GNA.GNA_HW: GNA_HW + GPU: Intel(R) Arc(TM) Graphics (iGPU) + NPU: Intel(R) AI Boost + Using full precision model in CPU with ``LatentConsistencyModelPipeline`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -132,14 +132,6 @@ https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models pipeline = LatentConsistencyModelPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") -.. parsed-literal:: - - 2024-10-08 02:50:26.200628: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:50:26.234856: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:50:26.890470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: @@ -172,6 +164,15 @@ https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models del pipeline gc.collect(); + + + +.. parsed-literal:: + + 345 + + + Select inference device for text-to-image generation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -190,15 +191,6 @@ Select inference device for text-to-image generation device - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - Running inference using Optimum Intel ``OVLatentConsistencyModelPipeline`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -228,99 +220,11 @@ and there is no need to do it manually ov_pipeline.reshape(batch_size=1, height=512, width=512, num_images_per_prompt=1) - -.. parsed-literal:: - - Framework not specified. Using pt to export the model. - Keyword arguments {'subfolder': '', 'trust_remote_code': False} are not expected by LatentConsistencyModelPipeline and will be ignored. - - - -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/7 [00:00=0.24.0" "controlnet-aux>=0.0.6" "peft>=0.6.2" accelerate --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.2.0" pillow "gradio>=4.19" "datasets>=2.14.6" "nncf>=2.7.0" + %pip install -q "openvino>=2023.2.0" pillow "gradio>=4.19" "datasets>=2.14.6" "nncf>=2.7.0" "matplotlib>=3.4" Prepare PyTorch models diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst new file mode 100644 index 00000000000000..d035645fb27291 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output.rst @@ -0,0 +1,517 @@ +Visual-language assistant with LLaVA and OpenVINO Generative API +================================================================ + +`LLaVA `__ (Large Language and Vision +Assistant) is large multimodal model that aims to develop a +general-purpose visual assistant that can follow both language and image +instructions to complete various real-world tasks. The idea is to +combine the power of large language models (LLMs) with vision encoders +like CLIP to create an end-to-end trained neural assistant that +understands and acts upon multimodal instructions. + +In the field of artificial intelligence, the goal is to create a +versatile assistant capable of understanding and executing tasks based +on both visual and language inputs. Current approaches often rely on +large vision models that solve tasks independently, with language only +used to describe image content. While effective, these models have fixed +interfaces with limited interactivity and adaptability to user +instructions. On the other hand, large language models (LLMs) have shown +promise as a universal interface for general-purpose assistants. By +explicitly representing various task instructions in language, these +models can be guided to switch and solve different tasks. To extend this +capability to the multimodal domain, the `LLaVA +paper `__ introduces \`visual +instruction-tuning, a novel approach to building a general-purpose +visual assistant. + +In this tutorial we consider how to use LLaVA model to build multimodal +chatbot using `OpenVINO +GenAI `__. For +demonstration purposes we will use +`LLaVA-1.5-7B `__ model for conversion, +similar steps required to run other models from `LLaVA Model +Zoo `__. + +- Install prerequisites +- Convert model to OpenVINO Intermediate Representation format using + Optimum Intel +- Compress model weights to 4 and 8 bits using NNCF +- Prepare OpenVINO GenAI inference pipeline +- Run OpenVINO model + + +**Table of contents:** + + +- `About model <#about-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize Model <#convert-and-optimize-model>`__ + + - `Convert model to OpenVINO IR format using Optimum + CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ + - `Compress Model weights to 4 and 8 bits using + NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ + +- `Prepare OpenVINO GenAI inference + pipeline <#prepare-openvino-genai-inference-pipeline>`__ + + - `Select inference device <#select-inference-device>`__ + - `Select model variant <#select-model-variant>`__ + - `Load OpenVINO model <#load-openvino-model>`__ + +- `Run model inference <#run-model-inference>`__ + + - `Prepare input data <#prepare-input-data>`__ + - `Test model inference <#test-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +About model +----------- + + + +LLaVA connects pre-trained `CLIP +ViT-L/14 `__ visual encoder and large +language model like Vicuna, LLaMa v2 or MPT, using a simple projection +matrix + +.. figure:: https://llava-vl.github.io/images/llava_arch.png + :alt: vlp_matrix.png + + vlp_matrix.png + +Model training procedure consists of 2 stages: + +- Stage 1: Pre-training for Feature Alignment. Only the projection + matrix is updated, based on a subset of CC3M. +- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM + are updated for two different use scenarios: + + - Visual Chat: LLaVA is fine-tuned on our generated multimodal + instruction-following data for daily user-oriented applications. + - Science QA: LLaVA is fine-tuned on this multimodal reasoning + dataset for the science domain. + +More details about model can be found in original `project +web-page `__, +`paper `__ and +`repo `__. + +Prerequisites +------------- + + + +Install required dependencies + +.. code:: ipython3 + + from pathlib import Path + import requests + + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai + + + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) + +Convert and Optimize Model +-------------------------- + + + +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Convert model to OpenVINO format. 3. +Compress model weights using NNCF. + +Let’s consider each step more deeply. + +Convert model to OpenVINO IR format using Optimum CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +.. code:: ipython3 + + from cmd_helper import optimum_cli + + model_id = "llava-hf/llava-1.5-7b-hf" + model_path = Path(model_id.split("/")[-1]) / "FP16" + + if not model_path.exists(): + optimum_cli(model_id, model_path, additional_args={"weight-format": "fp16"}) + +Compress Model weights to 4 and 8 bits using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + compression_mode = widgets.Dropdown( + options=["INT4", "INT8"], + value="INT4", + description="Compression mode:", + disabled=False, + ) + + compression_mode + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') + + + +.. code:: ipython3 + + import shutil + import nncf + import openvino as ov + import gc + + core = ov.Core() + + + def compress_model_weights(precision): + int4_compression_config = { + "mode": nncf.CompressWeightsMode.INT4_ASYM, + "group_size": 128, + "ratio": 1, + } + int8_compression_config = {"mode": nncf.CompressWeightsMode.INT8_ASYM} + + compressed_model_path = model_path.parent / precision + + if not compressed_model_path.exists(): + ov_model = core.read_model(model_path / "openvino_language_model.xml") + compression_config = int4_compression_config if precision == "INT4" else int8_compression_config + compressed_ov_model = nncf.compress_weights(ov_model, **compression_config) + ov.save_model(compressed_ov_model, compressed_model_path / "openvino_language_model.xml") + del compressed_ov_model + del ov_model + gc.collect() + for file_name in model_path.glob("*"): + if file_name.name in ["openvino_language_model.xml", "openvino_language_model.bin"]: + continue + shutil.copy(file_name, compressed_model_path) + + + compress_model_weights(compression_mode.value) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + + +Prepare OpenVINO GenAI inference pipeline +----------------------------------------- + + + +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor +of OpenVINO™, aiming to simplify running inference of generative AI +models. It hides the complexity of the generation process and minimizes +the amount of code required. + +Inference Visual language models can be implemented using OpenVINO GenAI +``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in +this +`notebook `__. +It supports chat mode with preserving conversational history inside +pipeline, that allows us effectively implements chatbot that supports +conversation about provided images content. + +.. code:: ipython3 + + from openvino_genai import VLMPipeline, GenerationConfig + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget(exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Select model variant +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + model_base_path = model_path.parent + available_models = [] + + for precision in ["INT4", "INT8", "FP16"]: + if (model_base_path / precision).exists(): + available_models.append(precision) + + model_variant = widgets.Dropdown( + options=available_models, + value=available_models[0], + description="Compression mode:", + disabled=False, + ) + + model_variant + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'FP16'), value='INT4') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ + + + +For pipeline initialization we should provide path to model directory +and inference device. + +.. code:: ipython3 + + ov_model = VLMPipeline(str(model_base_path / model_variant.value), device=device.value) + +Run model inference +------------------- + + + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Prepare input data +~~~~~~~~~~~~~~~~~~ + + + +For preparing input data, ``VLMPipeline`` use tokenizer and image +processor inside, we just need to convert image to input OpenVINO tensor +and provide question as string. Additionally, we can provides options +for controlling generation process (e.g. number of maximum generated +tokens or using multinomial sampling for decoding instead of greedy +search approach) using ``GenerationConfig``. + +Generation process for long response may be time consuming, for +accessing partial result as soon as it is generated without waiting when +whole process finished, Streaming API can be used. Token streaming is +the mode in which the generative system returns the tokens one by one as +the model generates them. This enables showing progressive generations +to the user rather than waiting for the whole generation. Streaming is +an essential aspect of the end-user experience as it reduces latency, +one of the most critical aspects of a smooth experience. + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + import numpy as np + + config = GenerationConfig() + config.max_new_tokens = 100 + + + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + image_data = np.array(image.getdata()).reshape(1, 3, image.size[1], image.size[0]).astype(np.byte) + return image, ov.Tensor(image_data) + + + def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + + image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + + image, image_tensor = load_image(image_file) + text_message = "What is unusual on this image?" + + prompt = text_message + +Test model inference +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + display(image) + print(f"Question:\n{text_message}") + print("Answer:") + output = ov_model.generate(prompt, image=image_tensor, generation_config=config, streamer=streamer) + + + +.. image:: llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png + + +.. parsed-literal:: + + Question: + What is unusual on this image? + Answer: + + The unusual aspect of this image is that a cat is lying inside a cardboard box. Cats are known for their curiosity and love for small, enclosed spaces. However, it is not a common sight to see a cat comfortably resting inside a cardboard box. + +Interactive demo +---------------- + + + +.. code:: ipython3 + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo_llava + + demo = make_demo_llava(ov_model) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-genai-with-output_files/llava-multimodal-chatbot-genai-with-output_21_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst new file mode 100644 index 00000000000000..ae14876b33b633 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst @@ -0,0 +1,515 @@ +Visual-language assistant with LLaVA and Optimum Intel OpenVINO integration +=========================================================================== + +`LLaVA `__ (Large Language and Vision +Assistant) is large multimodal model that aims to develop a +general-purpose visual assistant that can follow both language and image +instructions to complete various real-world tasks. The idea is to +combine the power of large language models (LLMs) with vision encoders +like CLIP to create an end-to-end trained neural assistant that +understands and acts upon multimodal instructions. + +In the field of artificial intelligence, the goal is to create a +versatile assistant capable of understanding and executing tasks based +on both visual and language inputs. Current approaches often rely on +large vision models that solve tasks independently, with language only +used to describe image content. While effective, these models have fixed +interfaces with limited interactivity and adaptability to user +instructions. On the other hand, large language models (LLMs) have shown +promise as a universal interface for general-purpose assistants. By +explicitly representing various task instructions in language, these +models can be guided to switch and solve different tasks. To extend this +capability to the multimodal domain, the `LLaVA +paper `__ introduces \`visual +instruction-tuning, a novel approach to building a general-purpose +visual assistant. + +In this tutorial we consider how to use LLaVA model to build multimodal +chatbot using `Optimum +Intel `__. For +demonstration purposes we will use +`LLaVA-1.5-7B `__ model for conversion, +similar steps required to run other models from `LLaVA Model +Zoo `__. + +The tutorial consists from following steps: + +- Install prerequisites +- Convert model to OpenVINO Intermediate Representation format using + Optimum Intel +- Compress model weights to 4 and 8 bits using NNCF +- Prepare OpenVINO-based inference pipeline +- Run OpenVINO model + + +**Table of contents:** + + +- `About model <#about-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize Model <#convert-and-optimize-model>`__ + + - `Convert model to OpenVINO IR format using Optimum + CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ + - `Compress Model weights to 4 and 8 bits using + NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ + +- `Prepare OpenVINO based inference + pipeline <#prepare-openvino-based-inference-pipeline>`__ +- `Run model inference <#run-model-inference>`__ + + - `Select inference device <#select-inference-device>`__ + - `Select model variant <#select-model-variant>`__ + - `Load OpenVINO model <#load-openvino-model>`__ + - `Prepare input data <#prepare-input-data>`__ + - `Test model inference <#test-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +About model +----------- + + + +LLaVA connects pre-trained `CLIP +ViT-L/14 `__ visual encoder and large +language model like Vicuna, LLaMa v2 or MPT, using a simple projection +matrix + +.. figure:: https://llava-vl.github.io/images/llava_arch.png + :alt: vlp_matrix.png + + vlp_matrix.png + +Model training procedure consists of 2 stages: + +- Stage 1: Pre-training for Feature Alignment. Only the projection + matrix is updated, based on a subset of CC3M. +- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM + are updated for two different use scenarios: + + - Visual Chat: LLaVA is fine-tuned on our generated multimodal + instruction-following data for daily user-oriented applications. + - Science QA: LLaVA is fine-tuned on this multimodal reasoning + dataset for the science domain. + +More details about model can be found in original `project +web-page `__, +`paper `__ and +`repo `__. + +Prerequisites +------------- + + + +Install required dependencies + +.. code:: ipython3 + + from pathlib import Path + import requests + + %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai + + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) + +Convert and Optimize Model +-------------------------- + + + +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Convert model to OpenVINO format. 3. +Compress model weights using NNCF. + +Let’s consider each step more deeply. + +Convert model to OpenVINO IR format using Optimum CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +.. code:: ipython3 + + from cmd_helper import optimum_cli + + model_id = "llava-hf/llava-1.5-7b-hf" + model_path = Path(model_id.split("/")[-1]) / "FP16" + + if not model_path.exists(): + optimum_cli(model_id, model_path, additional_args={"weight-format": "fp16"}) + +Compress Model weights to 4 and 8 bits using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + compression_mode = widgets.Dropdown( + options=["INT4", "INT8"], + value="INT4", + description="Compression mode:", + disabled=False, + ) + + compression_mode + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') + + + +.. code:: ipython3 + + import shutil + import nncf + import openvino as ov + import gc + + core = ov.Core() + + + def compress_model_weights(precision): + int4_compression_config = {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 1, "all_layers": True} + int8_compression_config = {"mode": nncf.CompressWeightsMode.INT8_ASYM} + + compressed_model_path = model_path.parent / precision + + if not compressed_model_path.exists(): + ov_model = core.read_model(model_path / "openvino_language_model.xml") + compression_config = int4_compression_config if precision == "INT4" else int8_compression_config + compressed_ov_model = nncf.compress_weights(ov_model, **compression_config) + ov.save_model(compressed_ov_model, compressed_model_path / "openvino_language_model.xml") + del compressed_ov_model + del ov_model + gc.collect() + for file_name in model_path.glob("*"): + if file_name.name in ["openvino_language_model.xml", "openvino_language_model.bin"]: + continue + shutil.copy(file_name, compressed_model_path) + + + compress_model_weights(compression_mode.value) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + + +Prepare OpenVINO based inference pipeline +----------------------------------------- + + + +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading pixtral model, we will use +``OVModelForVisualCausalLM`` class that have compatible interface with +Transformers LLaVA implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForVisualCausalLM + +Run model inference +------------------- + + + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget(exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Select model variant +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + model_base_path = model_path.parent + available_models = [] + + for precision in ["INT4", "INT8", "FP16"]: + if (model_base_path / precision).exists(): + available_models.append(precision) + + model_variant = widgets.Dropdown( + options=available_models, + value=available_models[0], + description="Compression mode:", + disabled=False, + ) + + model_variant + + + + +.. parsed-literal:: + + Dropdown(description='Compression mode:', options=('INT4', 'FP16'), value='INT4') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + ov_model = OVModelForVisualCausalLM.from_pretrained(model_base_path / model_variant.value, device=device.value) + +Prepare input data +~~~~~~~~~~~~~~~~~~ + + + +For preparing input data, we will use tokenizer and image processor +defined in the begging of our tutorial. For alignment with original +PyTorch implementation we will use PyTorch tensors as input. + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + from transformers import AutoProcessor, AutoConfig + + config = AutoConfig.from_pretrained(model_path) + + processor = AutoProcessor.from_pretrained( + model_path, patch_size=config.vision_config.patch_size, vision_feature_select_strategy=config.vision_feature_select_strategy + ) + + + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + return image + + + image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + text_message = "What is unusual on this image?" + + image = load_image(image_file) + + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": text_message}, + {"type": "image"}, + ], + }, + ] + + prompt = processor.apply_chat_template(conversation, add_generation_prompt=True) + + inputs = processor(images=image, text=prompt, return_tensors="pt") + +Test model inference +~~~~~~~~~~~~~~~~~~~~ + + + +Generation process for long response maybe time consuming, for accessing +partial result as soon as it is generated without waiting when whole +process finished, Streaming API can be used. Token streaming is the mode +in which the generative system returns the tokens one by one as the +model generates them. This enables showing progressive generations to +the user rather than waiting for the whole generation. Streaming is an +essential aspect of the end-user experience as it reduces latency, one +of the most critical aspects of a smooth experience. You can find more +details about how streaming work in `HuggingFace +documentation `__. + +Also for simplification of preparing input in conversational mode, we +will use Conversation Template helper provided by model authors for +accumulating history of provided messages and images. + +.. code:: ipython3 + + from transformers import TextStreamer + + # Prepare + streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True) + display(image) + print(f"Question: {text_message}") + print("Answer:") + + output_ids = ov_model.generate( + **inputs, + do_sample=False, + max_new_tokens=50, + streamer=streamer, + ) + + + +.. image:: llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png + + +.. parsed-literal:: + + Question: What is unusual on this image? + Answer: + The unusual aspect of this image is that a cat is lying inside a cardboard box, which is not a typical place for a cat to rest. Cats are known for their curiosity and love for small, enclosed spaces, but in this case + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + from gradio_helper import make_demo_llava_optimum + + demo = make_demo_llava_optimum(ov_model, processor) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output_files/llava-multimodal-chatbot-optimum-with-output_20_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-with-output.rst deleted file mode 100644 index fde37625041d43..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output.rst +++ /dev/null @@ -1,1342 +0,0 @@ -Visual-language assistant with LLaVA and OpenVINO -================================================= - -`LLaVA `__ (Large Language and Vision -Assistant) is large multimodal model that aims to develop a -general-purpose visual assistant that can follow both language and image -instructions to complete various real-world tasks. The idea is to -combine the power of large language models (LLMs) with vision encoders -like CLIP to create an end-to-end trained neural assistant that -understands and acts upon multimodal instructions. - -In the field of artificial intelligence, the goal is to create a -versatile assistant capable of understanding and executing tasks based -on both visual and language inputs. Current approaches often rely on -large vision models that solve tasks independently, with language only -used to describe image content. While effective, these models have fixed -interfaces with limited interactivity and adaptability to user -instructions. On the other hand, large language models (LLMs) have shown -promise as a universal interface for general-purpose assistants. By -explicitly representing various task instructions in language, these -models can be guided to switch and solve different tasks. To extend this -capability to the multimodal domain, the `LLaVA -paper `__ introduces \`visual -instruction-tuning, a novel approach to building a general-purpose -visual assistant. - -In this tutorial we consider how to use LLaVA model to build multimodal -chatbot. For demonstration purposes we will use -`LLaVA-Lightning-MPT-7B-preview `__ -model for conversion, similar steps required to run other models from -`LLaVA Model -Zoo `__. - -The tutorial consists from following steps: - -- Install prerequisites -- Prepare input processor and tokenizer -- Download original model -- Compress model weights to 4 and 8 bits using NNCF -- Convert model to OpenVINO Intermediate Representation (IR) format -- Prepare OpenVINO-based inference pipeline -- Run OpenVINO model - - -**Table of contents:** - - -- `About model <#about-model>`__ -- `Prerequisites <#prerequisites>`__ -- `Build model tokenizer and image - processor <#build-model-tokenizer-and-image-processor>`__ -- `Build model and convert it to OpenVINO IR - format <#build-model-and-convert-it-to-openvino-ir-format>`__ - - - `Prepare helpers for model - conversion <#prepare-helpers-for-model-conversion>`__ - - `Convert and Optimize Model <#convert-and-optimize-model>`__ - - - `Instantiate PyTorch model <#instantiate-pytorch-model>`__ - - `Compress Model weights to 4 and 8 bits using - NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ - - `Convert model to OpenVINO IR - format <#convert-model-to-openvino-ir-format>`__ - -- `Prepare OpenVINO based inference - pipeline <#prepare-openvino-based-inference-pipeline>`__ -- `Run model inference <#run-model-inference>`__ - - - `Select inference device <#select-inference-device>`__ - - `Load OpenVINO model <#load-openvino-model>`__ - - `Prepare input data <#prepare-input-data>`__ - - `Test model inference <#test-model-inference>`__ - -- `Interactive demo <#interactive-demo>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -About model ------------ - - - -LLaVA connects pre-trained `CLIP -ViT-L/14 `__ visual encoder and large -language model like Vicuna, LLaMa v2 or MPT, using a simple projection -matrix - -.. figure:: https://llava-vl.github.io/images/llava_arch.png - :alt: vlp_matrix.png - - vlp_matrix.png - -Model training procedure consists of 2 stages: - -- Stage 1: Pre-training for Feature Alignment. Only the projection - matrix is updated, based on a subset of CC3M. -- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM - are updated for two different use scenarios: - - - Visual Chat: LLaVA is fine-tuned on our generated multimodal - instruction-following data for daily user-oriented applications. - - Science QA: LLaVA is fine-tuned on this multimodal reasoning - dataset for the science domain. - -More details about model can be found in original `project -web-page `__, -`paper `__ and -`repo `__. - -Prerequisites -------------- - - - -Install required dependencies - -.. code:: ipython3 - - import sys - - %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.2.0" "nncf>=2.7.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.37.2" "gradio>=4.19" "einops" - -.. code:: ipython3 - - from pathlib import Path - - repo_dir = Path("LLaVA") - - if not repo_dir.exists(): - !git clone https://github.com/haotian-liu/LLaVA.git - - sys.path.insert(0, str(repo_dir.resolve())) - -Build model tokenizer and image processor ------------------------------------------ - - - -For starting work with model, we need understand how to prepare input -data first. As it is already discussed before, LLaVA is multimodal model -that accepts input user instructions in text format and image for -analysis. In the same time, LLaVA is combination of 2 fundamental -pretrained models for text and image processing, CLIP and MPT, each of -them has own approach for preparing data - tokenization for input text -and preprocessing for input image. LLaVA reuses these steps with small -adoption: introduced special tokens that serves for specification of -image location in the text that should be injected in provided user -instruction. - -.. code:: ipython3 - - from transformers import AutoTokenizer, AutoConfig, CLIPImageProcessor - from llava.model.language_model.llava_mpt import LlavaMptForCausalLM - - model_id = "liuhaotian/LLaVA-Lightning-MPT-7B-preview" - - config = AutoConfig.from_pretrained(model_id) - tokenizer = AutoTokenizer.from_pretrained(model_id) - image_processor = CLIPImageProcessor.from_pretrained(config.mm_vision_tower) - - -.. parsed-literal:: - - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - - -.. code:: ipython3 - - from llava.constants import ( - DEFAULT_IMAGE_PATCH_TOKEN, - DEFAULT_IM_START_TOKEN, - DEFAULT_IM_END_TOKEN, - DEFAULT_IMAGE_TOKEN, - ) - - mm_use_im_start_end = getattr(config, "mm_use_im_start_end", False) - mm_use_im_patch_token = getattr(config, "mm_use_im_patch_token", True) - if mm_use_im_patch_token: - tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True) - if mm_use_im_start_end: - tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True) - - if hasattr(config, "max_sequence_length"): - context_len = config.max_sequence_length - else: - context_len = 2048 - -Build model and convert it to OpenVINO IR format ------------------------------------------------- - - - -LLaVA is autoregressive transformer generative model, it means that each -next model step depends from model output from previous step. The -generation approach is based on the assumption that the probability -distribution of a word sequence can be decomposed into the product of -conditional next word distributions. In other words, model predicts the -next token in the loop guided by previously generated tokens until the -stop-condition will be not reached (generated sequence of maximum length -or end of string token obtained). The way the next token will be -selected over predicted probabilities is driven by the selected decoding -methodology. You can find more information about the most popular -decoding methods in this -`blog `__. The entry point -for the generation process for models from the Hugging Face Transformers -library is the ``generate`` method. You can find more information about -its parameters and configuration in the -`documentation `__. -To preserve flexibility in the selection decoding methodology, we will -convert only model inference for one step. - -The inference flow has difference on first step and for the next. On the -first step, model accept preprocessed input instruction and image, that -transformed to the unified embedding space using ``token_embedding`` and -``image_encoder`` models, after that LLM-based part of model runs on -input embeddings to predict probability of next generated tokens. On the -next step, model accepts only next token id selected based on sampling -strategy and cached attention key and values. Since the output side is -auto-regressive, an output token hidden state remains the same once -computed for every further generation step. Therefore, recomputing it -every time you want to generate a new token seems wasteful. With the -cache, the model saves the hidden state once it has been computed. The -model only computes the one for the most recently generated output token -at each time step, re-using the saved ones for hidden tokens. This -reduces the generation complexity from :math:`O(n^3)` to :math:`O(n^2)` -for a transformer model. More details about how it works can be found in -this -`article `__. - -Prepare helpers for model conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -The code below prepares function for converting LLaVA model to OpenVINO -Intermediate Representation format. It splits model on parts described -above, prepare example inputs for each part and convert each part using -`OpenVINO Model Conversion -API `__. -``ov.convert_model`` function accepts PyTorch model instance and returns -``ov.Model`` object that represent model in OpenVINO format. It is ready -to use for loading on device using ``ov.compile_model`` or can be saved -on disk using ``ov.save_model``. - -.. code:: ipython3 - - from functools import wraps - import gc - import warnings - import torch - import openvino as ov - import nncf - from typing import Optional, Tuple, List - import torch.nn.functional as F - - warnings.filterwarnings("ignore") - - - class ModelWrapper(torch.nn.Module): - """ - Model wrapper class for export for spliting original forward logic on preparing multimodal data and inference using it. - That allows us to sperate image encoder and token embeddings model from general flow. - """ - - def __init__(self, model): - super().__init__() - self.model = model - - def forward( - self, - input_ids: torch.LongTensor = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - attention_mask: Optional[torch.Tensor] = None, - ): - outputs = self.model.transformer( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - past_key_values=past_key_values, - attention_mask=attention_mask, - return_dict=True, - output_attentions=False, - output_hidden_states=False, - use_cache=True, - ) - logits = F.linear( - outputs.last_hidden_state.to(self.model.transformer.wte.weight.device), - self.model.transformer.wte.weight.to(outputs.last_hidden_state.dtype), - ) - - return (logits, tuple(outputs.past_key_values)) - - - def patch_model_forward(model): - """ - Helper function for patching model forward for model with past. - It makes model more convinient for export to TorchScript format avoiding limitation - that list of tensors can not be correctly traced as model input - """ - - orig_forward = model.forward - - @wraps(orig_forward) - def ts_patched_forward( - input_ids: torch.Tensor, - past_key_values: Tuple[Tuple[torch.Tensor]], - attention_mask: torch.LongTensor, - ): - pkv_list = list(past_key_values) - outs = orig_forward( - input_ids=input_ids, - past_key_values=pkv_list, - attention_mask=attention_mask, - ) - return outs - - model.forward = ts_patched_forward - return model - - - def flattenize_inputs(inputs): - """ - Helper function for making nested inputs flattens - """ - flatten_inputs = [] - for input_data in inputs: - if input_data is None: - continue - if isinstance(input_data, (list, tuple)): - flatten_inputs.extend(flattenize_inputs(input_data)) - else: - flatten_inputs.append(input_data) - return flatten_inputs - - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - def postprocess_converted_model( - ov_model, - example_input=None, - input_names=None, - output_names=None, - dynamic_shapes=None, - ): - """ - Helper function for appling postprocessing on converted model with updating input names, shapes and output names - acording to requested specification - """ - flatten_example_inputs = flattenize_inputs(example_input) if example_input else [] - - if input_names: - for inp_name, m_input, input_data in zip(input_names, ov_model.inputs, flatten_example_inputs): - input_node = m_input.get_node() - if input_node.element_type == ov.Type.dynamic: - m_input.get_node().set_element_type(ov.Type.f32) - shape = list(input_data.shape) - if dynamic_shapes is not None and inp_name in dynamic_shapes: - for k in dynamic_shapes[inp_name]: - shape[k] = -1 - input_node.set_partial_shape(ov.PartialShape(shape)) - m_input.get_tensor().set_names({inp_name}) - - if output_names: - for out, out_name in zip(ov_model.outputs, output_names): - out.get_tensor().set_names({out_name}) - ov_model.validate_nodes_and_infer_types() - return ov_model - - - def convert_llava_mpt( - pt_model: torch.nn.Module, - model_path: Path, - image_encoder_wc_parameters: Optional[dict] = None, - llava_wc_parameters: Optional[dict] = None, - ): - """ - LLaVA MPT model conversion function - - Params: - pt_model: PyTorch model - model_path: path for saving model - Returns: - None - """ - ov_out_path = Path(model_path) - pt_model.config.save_pretrained(ov_out_path) - pt_model.config.use_cache = True - pt_model.config.torchscript = True - first_stage_model_path = ov_out_path / "llava_input_embed.xml" - image_encoder_path = ov_out_path / "image_encoder.xml" - token_embedding_model_path = ov_out_path / "token_embed.xml" - second_stage_model_path = ov_out_path / "llava_with_past.xml" - if not image_encoder_path.exists(): - model.forward = model.encode_images - ov_model = ov.convert_model( - model, - example_input=torch.zeros((1, 3, 224, 224)), - input=[(-1, 3, 224, 224)], - ) - if image_encoder_wc_parameters is not None: - print("Applying weight compression to image encoder") - ov_model = nncf.compress_weights(ov_model, **image_encoder_wc_parameters) - ov.save_model(ov_model, image_encoder_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Image Encoder model successfully converted") - - if not token_embedding_model_path.exists(): - model.forward = model.get_model().embed_tokens - ov_model = ov.convert_model(model, example_input=torch.ones((1, 10), dtype=torch.long)) - ov.save_model(ov_model, token_embedding_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Token Embedding model successfully converted") - - if first_stage_model_path.exists() and second_stage_model_path.exists(): - print("LLaVA model successfully converted") - del pt_model - return - model_wrap = ModelWrapper(model) - example_input_first_stage = { - "inputs_embeds": torch.zeros((1, 307, 4096)), - "attention_mask": torch.ones((1, 307), dtype=torch.long), - } - outs = model_wrap(**example_input_first_stage) - inputs = ["input_ids"] - outputs = ["logits"] - dynamic_shapes = {"input_ids": {1: "seq_len"}, "attention_mask": {1: "seq_len"}} - for idx in range(len(outs[1])): - inputs.extend([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"]) - dynamic_shapes[inputs[-1]] = {2: "past_sequence + sequence"} - dynamic_shapes[inputs[-2]] = {2: "past_sequence + sequence"} - outputs.extend([f"present.{idx}.key", f"present.{idx}.value"]) - - inputs.extend(["attention_mask"]) - if not first_stage_model_path.exists(): - ov_model = ov.convert_model(model_wrap, example_input=example_input_first_stage) - ov_model = postprocess_converted_model(ov_model, output_names=outputs) - if llava_wc_parameters is not None: - print("Applying weight compression to first stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, first_stage_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - if not second_stage_model_path.exists(): - model_wrap = patch_model_forward(model_wrap) - example_input_second_stage = { - "input_ids": torch.ones((1, 1), dtype=torch.long), - "past_key_values": outs[1], - "attention_mask": torch.ones((1, outs[1][-1][-1].shape[-2] + 1), dtype=torch.long), - } - ov_model = ov.convert_model(model_wrap, example_input=example_input_second_stage) - ov_model = postprocess_converted_model( - ov_model, - example_input=example_input_second_stage.values(), - input_names=inputs, - output_names=outputs, - dynamic_shapes=dynamic_shapes, - ) - if llava_wc_parameters is not None: - print("Applying weight compression to second stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, second_stage_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("LLaVA model successfully converted") - del model_wrap - del pt_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino - - -Convert and Optimize Model -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Our model conversion and optimization consist of following steps: 1. -Download original PyTorch model. 2. Compress model weights using NNCF 3. -Convert model to OpenVINO format and save it on disk. - -Let’s consider each step more deeply. - -Instantiate PyTorch model -^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -For creating PyTorch model we should use ``from_pretrained`` method of -``LlavaMPTForCausalLM`` model class. Model weights will be downloaded -from `HuggingFace hub `__ during first -run. It may takes some time and requires at least 13 Gb free space on -disk. - -Compress Model weights to 4 and 8 bits using NNCF -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -For reducing memory consumption, weights compression optimization can be -applied using `NNCF `__. Weight -compression aims to reduce the memory footprint of a model. It can also -lead to significant performance improvement for large memory-bound -models, such as Large Language Models (LLMs). LLMs and other models, -which require extensive memory to store the weights during inference, -can benefit from weight compression in the following ways: - -- enabling the inference of exceptionally large models that cannot be - accommodated in the memory of the device; - -- improving the inference performance of the models by reducing the - latency of the memory access when computing the operations with - weights, for example, Linear layers. - -`Neural Network Compression Framework -(NNCF) `__ provides 4-bit / -8-bit mixed weight quantization as a compression method primarily -designed to optimize LLMs. The main difference between weights -compression and full model quantization (post-training quantization) is -that activations remain floating-point in the case of weights -compression which leads to a better accuracy. Weight compression for -LLMs provides a solid inference performance improvement which is on par -with the performance of the full model quantization. In addition, weight -compression is data-free and does not require a calibration dataset, -making it easy to use. - -``nncf.compress_weights`` function can be used for performing weights -compression. The function accepts an OpenVINO model and other -compression parameters. Compared to INT8 compression, INT4 compression -improves performance even more, but introduces a minor drop in -prediction quality. - -More details about weights compression, can be found in `OpenVINO -documentation `__. - - **Note**: There is no speedup for INT4 compressed models on dGPU. - -Convert model to OpenVINO IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Convert model to OpenVINO format using conversion helper function -defined above. - -Please select below whether you would like to run INT4 weight -compression instead of INT8 weight compression. - -.. code:: ipython3 - - import ipywidgets as widgets - - compression_mode = widgets.Dropdown( - options=["INT4", "INT8"], - value="INT4", - description="Compression mode:", - disabled=False, - ) - - compression_mode - - - - -.. parsed-literal:: - - Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') - - - -.. code:: ipython3 - - if compression_mode.value == "INT4": - compressed_model_dir = Path("llava-mpt/INT4_compressed_weights") - llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) - else: - compressed_model_dir = Path("llava-mpt/INT8_compressed_weights") - llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - - if not compressed_model_dir.exists(): - compressed_model_dir.mkdir(exist_ok=True, parents=True) - config.save_pretrained(compressed_model_dir) - model = LlavaMptForCausalLM.from_pretrained(model_id) - vision_tower = model.get_vision_tower() - if not vision_tower.is_loaded: - vision_tower.load_model() - - if mm_use_im_start_end: - model.resize_token_embeddings(len(tokenizer)) - - model.eval() - with torch.no_grad(): - convert_llava_mpt( - model, - compressed_model_dir, - image_encoder_wc_parameters=dict(mode=nncf.CompressWeightsMode.INT8), - llava_wc_parameters=llava_wc_parameters, - ) - del model - gc.collect(); - - - -.. parsed-literal:: - - Loading checkpoint shards: 0%| | 0/2 [00:00`__. - -.. code:: ipython3 - - from transformers.generation import GenerationConfig, GenerationMixin - from transformers.modeling_outputs import CausalLMOutputWithPast - from transformers import AutoConfig - import numpy as np - import torch - - - class OVLlavaMPTForCausalLM(GenerationMixin): - def __init__(self, core, model_dir, device): - self.image_encoder = core.compile_model(model_dir / "image_encoder.xml", device) - self.token_embed = core.compile_model(model_dir / "token_embed.xml", device) - self.model = core.read_model(model_dir / "llava_with_past.xml") - self.model_input_embed = core.compile_model(model_dir / "llava_input_embed.xml", device) - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} - self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.outputs)} - self.key_value_input_names = [key for key in self.input_names if "key_values" in key] - self.key_value_output_names = [key for key in self.output_names if "present" in key] - compiled_model = core.compile_model(self.model, device) - self.request = compiled_model.create_infer_request() - self.config = AutoConfig.from_pretrained(model_dir) - self.generation_config = GenerationConfig.from_model_config(config) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.num_pkv = 2 - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - return self.forward(input_ids, images, attention_mask, prefix_mask, past_key_values) - - def forward( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - """General inference method""" - inputs = {} - if past_key_values is not None: - # Flatten the past_key_values - attention_mask = torch.ones( - (input_ids.shape[0], past_key_values[-1][-1].shape[-2] + 1), - dtype=input_ids.dtype, - ) - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - # Add the past_key_values to the decoder inputs - inputs = dict(zip(self.key_value_input_names, past_key_values)) - - else: - return self.forward_with_image(input_ids, images, attention_mask) - inputs["input_ids"] = np.array(input_ids) - - if "attention_mask" in self.input_names: - inputs["attention_mask"] = np.array(attention_mask) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor("logits").data) - - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def forward_with_image(self, input_ids, images, attention_mask): - """First step inference method, that resolves multimodal data""" - input_embed, attention_mask = self.prepare_multimodal_input(input_ids, images, attention_mask) - outs = self.model_input_embed([input_embed, attention_mask]) - logits = outs[0] - pkv = list(outs.values())[1:] - pkv = tuple(pkv[i : i + self.num_pkv] for i in range(0, len(pkv), self.num_pkv)) - return CausalLMOutputWithPast(logits=torch.from_numpy(logits), past_key_values=pkv) - - def prepare_multimodal_input(self, input_ids, images, attention_mask): - """Preprocessing function for embedding multimodal data""" - image_features = [] - if images is not None: - image_features = self.image_encoder(images)[0] - - new_input_embeds = [] - cur_image_idx = 0 - for batch_idx, cur_input_ids in enumerate(input_ids): - if (cur_input_ids == IMAGE_TOKEN_INDEX).sum() == 0: - # multimodal LLM, but the current sample is not multimodal - cur_input_embeds = torch.from_numpy(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - new_input_embeds.append(cur_input_embeds) - cur_image_idx += 1 - continue - image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] - cur_new_input_embeds = [] - while image_token_indices.numel() > 0: - cur_image_features = image_features[cur_image_idx] - image_token_start = image_token_indices[0] - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - embd = self.token_embed(cur_input_ids[: image_token_start - 1].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - embd = self.token_embed(cur_input_ids[image_token_start - 1 : image_token_start].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - cur_new_input_embeds.append(cur_image_features) - embd = self.token_embed(cur_input_ids[image_token_start + 1 : image_token_start + 2].unsqueeze(0))[0][0] - cur_new_input_embeds.append(embd) - else: - cur_new_input_embeds.append(self.token_embed(cur_input_ids[:image_token_start].unsqueeze(0))[0][0]) - cur_new_input_embeds.append(cur_image_features) - cur_image_idx += 1 - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - cur_input_ids = cur_input_ids[image_token_start + 2 :] - else: - cur_input_ids = cur_input_ids[image_token_start + 1 :] - image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] - if cur_input_ids.numel() > 0: - if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False): - cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - else: - cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) - cur_new_input_embeds = [torch.from_numpy(x) for x in cur_new_input_embeds] - cur_new_input_embeds = torch.cat(cur_new_input_embeds, dim=0) - new_input_embeds.append(cur_new_input_embeds) - - if any(x.shape != new_input_embeds[0].shape for x in new_input_embeds): - max_len = max(x.shape[0] for x in new_input_embeds) - - new_input_embeds_align = [] - for cur_new_embed in new_input_embeds: - cur_new_embed = torch.cat( - ( - cur_new_embed, - torch.zeros( - (max_len - cur_new_embed.shape[0], cur_new_embed.shape[1]), - dtype=cur_new_embed.dtype, - ), - ), - dim=0, - ) - new_input_embeds_align.append(cur_new_embed) - new_input_embeds = torch.stack(new_input_embeds_align, dim=0) - - if attention_mask is not None: - new_attention_mask = [] - for cur_attention_mask, cur_new_labels, cur_new_labels_align in zip(attention_mask, _new_labels, new_labels): - new_attn_mask_pad_left = torch.full( - (cur_new_labels.shape[0] - labels.shape[1],), - True, - dtype=attention_mask.dtype, - ) - new_attn_mask_pad_right = torch.full( - (cur_new_labels_align.shape[0] - cur_new_labels.shape[0],), - False, - dtype=attention_mask.dtype, - ) - cur_new_attention_mask = torch.cat( - ( - new_attn_mask_pad_left, - cur_attention_mask, - new_attn_mask_pad_right, - ), - dim=0, - ) - new_attention_mask.append(cur_new_attention_mask) - attention_mask = torch.stack(new_attention_mask, dim=0) - assert attention_mask.shape == new_labels.shape - else: - new_input_embeds = torch.stack(new_input_embeds, dim=0) - - if attention_mask is not None: - new_attn_mask_pad_left = torch.full( - ( - attention_mask.shape[0], - new_input_embeds.shape[1] - input_ids.shape[1], - ), - True, - dtype=attention_mask.dtype, - ) - attention_mask = torch.cat((new_attn_mask_pad_left, attention_mask), dim=1) - assert attention_mask.shape == new_input_embeds.shape[:2] - - return new_input_embeds, attention_mask - - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): - """ - This function is used during running GenerationMixin.generate for preparing model specific inputs for - each generation step - """ - past_len = 0 - if past_key_values is not None: - input_ids = input_ids[:, -1].unsqueeze(-1) - past_len = past_key_values[-1][-1].shape[-2] - attention_mask = kwargs.get( - "attention_mask", - torch.ones(input_ids.shape[0], input_ids.shape[1] + past_len), - ) - if not kwargs.get("use_cache", True): - raise NotImplementedError("MPT with prefix_lm=True does not support use_cache=False.") - else: - prefix_mask = None - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "prefix_mask": prefix_mask, - "past_key_values": past_key_values, - "images": kwargs.get("images", None), - } - - def _reorder_cache(self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]: - """ - This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or - [`~PreTrainedModel.beam_sample`] is called. - This is required to match `past_key_values` with the correct beam_idx at every generation step. - """ - - # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache - return tuple(tuple(np.take(past_state, beam_idx, 0) for past_state in layer_past) for layer_past in past_key_values) - -Run model inference -------------------- - - - -Now, when we have model and defined generation pipeline, we can run -model inference. - -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ - - - -Select device from dropdown list for running inference using OpenVINO. - - **Note**: There is no speedup for INT4 compressed models on dGPU. - -.. code:: ipython3 - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - from notebook_utils import device_widget - - device = device_widget(exclude=["NPU"]) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') - - - -Load OpenVINO model -~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - core = ov.Core() - - ov_model = OVLlavaMPTForCausalLM(core, compressed_model_dir, device.value) - -Prepare input data -~~~~~~~~~~~~~~~~~~ - - - -For preparing input data, we will use tokenizer and image processor -defined in the begging of our tutorial. For alignment with original -PyTorch implementation we will use PyTorch tensors as input. - -.. code:: ipython3 - - import requests - from PIL import Image - from io import BytesIO - - - def load_image(image_file): - if image_file.startswith("http") or image_file.startswith("https"): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert("RGB") - else: - image = Image.open(image_file).convert("RGB") - return image - - - image_file = "https://llava-vl.github.io/static/images/view.jpg" - - image = load_image(image_file) - image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] - - text_message = "What are the things I should be cautious about when I visit here?" - print(f"Question: {text_message}") - image - - -.. parsed-literal:: - - Question: What are the things I should be cautious about when I visit here? - - - - -.. image:: llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png - - - -Test model inference -~~~~~~~~~~~~~~~~~~~~ - - - -Generation process for long response maybe time consuming, for accessing -partial result as soon as it is generated without waiting when whole -process finished, Streaming API can be used. Token streaming is the mode -in which the generative system returns the tokens one by one as the -model generates them. This enables showing progressive generations to -the user rather than waiting for the whole generation. Streaming is an -essential aspect of the end-user experience as it reduces latency, one -of the most critical aspects of a smooth experience. You can find more -details about how streaming work in `HuggingFace -documentation `__. - -Also for simplification of preparing input in conversational mode, we -will use Conversation Template helper provided by model authors for -accumulating history of provided messages and images. - -.. code:: ipython3 - - from llava.mm_utils import tokenizer_image_token, KeywordsStoppingCriteria - from llava.constants import IMAGE_TOKEN_INDEX - from transformers import TextStreamer - from llava.conversation import conv_templates, SeparatorStyle - - # Prepare - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - conv_mode = "mpt" - - conv = conv_templates[conv_mode].copy() - roles = ("user", "assistant") - - if mm_use_im_start_end: - inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + "\n" + text_message - else: - inp = DEFAULT_IMAGE_TOKEN + "\n" + text_message - conv.append_message(conv.roles[0], inp) - conv.append_message(conv.roles[1], None) - - prompt = conv.get_prompt() - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - print("Answer:") - - output_ids = ov_model.generate( - input_ids, - images=image_tensor, - do_sample=True, - temperature=0.2, - max_new_tokens=1024, - streamer=streamer, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - - -.. parsed-literal:: - - Answer: - When visiting this location, I should be cautious about the water level and the presence of boats. The image shows a dock with a boat in the water, and the water appears to be relatively shallow. It is essential to be mindful of the water depth when approaching the dock, as it could be dangerous to step into the water without checking the water level. Additionally, I should be aware of the boats in the water, as they could pose a risk if they are not properly secured or if they are not being used as intended. It is crucial to maintain a safe distance from the boats and follow any posted signs or guidelines to ensure a safe and enjoyable experience. - - -Interactive demo ----------------- - - - -.. code:: ipython3 - - from threading import Event, Thread - from transformers import TextIteratorStreamer - - conv = conv_templates[conv_mode].copy() - conv.messages = [] - - - def clear_history(textbox, imagebox, chatbot): - """ - callback function for clearing chat windows in interface on clear button click - - Params: - textbox: current textbox for user messages state - imagebox: current imagebox state - chatbot: current chatbot state - Returns: - empty textbox, imagebox and chatbot states - """ - conv.messages = [] - - return None, None, None - - - def handle_user_message(message, history): - """ - callback function for updating user messages in interface on submit button click - - Params: - message: current message - history: conversation history - Returns: - updated message and conversation history - """ - # Append the user's message to the conversation history - return "", history + [[message, ""]] - - - def run_chatbot(image, history, temperature=0.2, top_p=0.7, max_new_tokens=1024): - """ - callback function for running chatbot on submit button click - - Params: - history: conversation history - temperature: parameter for control the level of creativity in AI-generated text. - By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse. - top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability. - - """ - - text = history[-1][0] - if len(text) <= 0 and image is None: - conv.skip_next = True - yield history - text = text[:1536] # Hard cut-off - if image is not None: - text = text[:1200] # Hard cut-off for images - if "" not in text: - text = text + "\n" - text = (text, image, "Resize") - conv.append_message(conv.roles[0], text) - conv.append_message(conv.roles[1], None) - conv.skip_next = False - - # Construct the input message string for the model by concatenating the current system message and conversation history - prompt = conv.get_prompt() - image = conv.get_images(return_pil=True) - if not image: - image_tensor = None - else: - image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) - # Tokenize the messages string - streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - generate_kwargs = dict( - input_ids=input_ids, - images=image_tensor, - max_new_tokens=max_new_tokens, - temperature=temperature, - do_sample=temperature > 0.001, - top_p=top_p, - streamer=streamer, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - - stream_complete = Event() - - def generate_and_signal_complete(): - """ - genration function for single thread - """ - ov_model.generate(**generate_kwargs) - stream_complete.set() - - t1 = Thread(target=generate_and_signal_complete) - t1.start() - - # Initialize an empty string to store the generated text - partial_text = "" - for new_text in streamer: - if not new_text: - continue - partial_text += new_text - conv.messages[-1][-1] = partial_text - history[-1][1] = partial_text - yield history - -.. code:: ipython3 - - if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llava-multimodal-chatbot/gradio_helper.py") - open("gradio_helper.py", "w").write(r.text) - - from gradio_helper import make_demo_llava - - demo = make_demo_llava(handle_user_message=handle_user_message, run_chatbot=run_chatbot, clear_history=clear_history) - - try: - demo.queue(max_size=2).launch(debug=False) - except Exception: - demo.queue(max_size=2).launch(share=True, debug=False) - # if you are launching remotely, specify server_name and server_port - # demo.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg b/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg deleted file mode 100644 index 29fc338b516a09..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f825c10443339b42cb5e2415f48bb7bafb4e087fb29bce6d2feaf3c2f89788c8 -size 72374 diff --git a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png b/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png deleted file mode 100644 index c1062ffb3d6d10..00000000000000 --- a/docs/notebooks/llava-multimodal-chatbot-with-output_files/llava-multimodal-chatbot-with-output_20_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dde262e54da6d8dad5062989d7863db7cd85ac0403b9015a76f5884472f67ceb -size 599941 diff --git a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst index b903b7d5081d94..07f84987dca33e 100644 --- a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst +++ b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst @@ -70,6 +70,8 @@ Prerequisites .. code:: ipython3 import os + from pathlib import Path + import requests os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" @@ -85,6 +87,17 @@ Prerequisites "git+https://github.com/huggingface/optimum-intel.git" \ "git+https://github.com/openvinotoolkit/nncf.git" + utility_files = ["notebook_utils.py", "cmd_helper.py"] + + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) + Create a Function calling agent ------------------------------- @@ -179,12 +192,13 @@ folder. .. code:: ipython3 from pathlib import Path + from cmd_helper import optimum_cli model_id = "Qwen/Qwen2-7B-Instruct" model_path = "Qwen2-7B-Instruct-ov" if not Path(model_path).exists(): - !optimum-cli export openvino --model {model_id} --task text-generation-with-past --trust-remote-code --weight-format int4 --ratio 0.72 {model_path} + optimum_cli(model_id, model_path, additional_args={"task": "text-generation-with-past", "trust-remote-code": "", "weight-format": "int4", "ratio": "0.72"}) Select inference device for LLM ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -193,23 +207,9 @@ Select inference device for LLM .. code:: ipython3 - import openvino as ov - import ipywidgets as widgets - - core = ov.Core() - - support_devices = core.available_devices - if "NPU" in support_devices: - support_devices.remove("NPU") - - device = widgets.Dropdown( - options=support_devices + ["AUTO"], - value="CPU", - description="Device:", - disabled=False, - ) + from notebook_utils import device_widget - device + device = device_widget("CPU", ["NPU"]) diff --git a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst index 7151d7b4341443..6aa437b9f2d37a 100644 --- a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst +++ b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst @@ -82,17 +82,18 @@ Install required dependencies import os import requests + from pathlib import Path + utility_files = ["notebook_utils.py", "cmd_helper.py", "pip_helper.py"] - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", - ) - open("pip_helper.py", "w").write(r.text) + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" @@ -220,6 +221,7 @@ code: from pathlib import Path import huggingface_hub as hf_hub + from cmd_helper import optimum_cli llm_model_path = llm_model_id.value.split("/")[-1] repo_name = llm_model_id.value.split("/")[0] @@ -228,7 +230,7 @@ code: if repo_name == "OpenVINO": hf_hub.snapshot_download(llm_model_id.value, local_dir=llm_model_path) else: - !optimum-cli export openvino --model {llm_model_id.value} --task text-generation-with-past --trust-remote-code --weight-format int4 --group-size 128 --ratio 0.8 {llm_model_path} + !optimum_cli(llm_model_id.value, llm_model_path, additional_args=-{"task": "text-generation-with-past", "weight-format": "int4"}) Download Embedding model ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -249,7 +251,7 @@ example. embedding_model_path = "bge-small-en-v1.5" if not Path(embedding_model_path).exists(): - !optimum-cli export openvino --model {embedding_model_id} --task feature-extraction {embedding_model_path} + optimum_cli(embedding_model_id, embedding_model_path, additional_args={"task": "feature-extraction"}) Create models ------------- diff --git a/docs/notebooks/llm-agent-react-with-output.rst b/docs/notebooks/llm-agent-react-with-output.rst new file mode 100644 index 00000000000000..653b57a491dbf2 --- /dev/null +++ b/docs/notebooks/llm-agent-react-with-output.rst @@ -0,0 +1,561 @@ +Create a native Agent with OpenVINO +=================================== + +LLM are limited to the knowledge on which they have been trained and the +additional knowledge provided as context, as a result, if a useful piece +of information is missing the provided knowledge, the model cannot “go +around” and try to find it in other sources. This is the reason why we +need to introduce the concept of Agents. + +The core idea of agents is to use a language model to choose a sequence +of actions to take. In agents, a language model is used as a reasoning +engine to determine which actions to take and in which order. Agents can +be seen as applications powered by LLMs and integrated with a set of +tools like search engines, databases, websites, and so on. Within an +agent, the LLM is the reasoning engine that, based on the user input, is +able to plan and execute a set of actions that are needed to fulfill the +request. + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/91237924/22fa5396-8381-400f-a78f-97e25d57d807 + :alt: agent + + agent + +This example will demonstrate how to create a native agent with +OpenVINO. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Create LLM as agent <#create-llm-as-agent>`__ + + - `Download model <#select-model>`__ + - `Select inference device for + LLM <#select-inference-device-for-llm>`__ + - `Instantiate LLM using Optimum + Intel <#instantiate-llm-using-optimum-intel>`__ + - `Create text generation method <#create-text-generation-method>`__ + +- `Create prompt template <#create-prompt-template>`__ +- `Create parser <#create-parers>`__ +- `Create tools calling <#create-tool-calling>`__ +- `Run agent <#run-agent>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + import os + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", + ) + open("pip_helper.py", "w").write(r.text) + + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + + from pip_helper import pip_install + + pip_install( + "-q", + "--extra-index-url", + "https://download.pytorch.org/whl/cpu", + "transformers>=4.43.1", + ) + pip_install("-q", "git+https://github.com/huggingface/optimum-intel.git", "git+https://github.com/openvinotoolkit/nncf.git", "datasets", "accelerate") + pip_install("--pre", "-Uq", "openvino>=2024.4.0", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly") + +Create LLM as agent +------------------- + + + +Download LLM +~~~~~~~~~~~~ + + + +To run LLM locally, we have to download the model in the first step. It +is possible to `export your +model `__ +to the OpenVINO IR format with the CLI, and load the model from local +folder. + +Large Language Models (LLMs) are a core component of agent. LlamaIndex +does not serve its own LLMs, but rather provides a standard interface +for interacting with many different LLMs. In this example, we can select +``Qwen2.5`` as LLM in agent pipeline. + +* **qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - + Qwen2.5 is the latest series of Qwen large language models. Comparing + with Qwen2, Qwen2.5 series brings significant improvements in coding, + mathematics and general knowledge skills. Additionally, it brings + long-context and multiple languages support including Chinese, English, + French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, + Vietnamese, Thai, Arabic, and more. For more details, please refer to + `model_card `__, + `blog `__, + `GitHub `__, and + `Documentation `__. + +.. code:: ipython3 + + import ipywidgets as widgets + + llm_model_ids = ["Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "Qwen/qwen2.5-14b-instruct"] + + llm_model_id = widgets.Dropdown( + options=llm_model_ids, + value=llm_model_ids[0], + description="Model:", + disabled=False, + ) + + llm_model_id + + + + +.. parsed-literal:: + + Dropdown(description='Model:', options=('Qwen/Qwen2.5-3B-Instruct', 'Qwen/Qwen2.5-7B-Instruct', 'Qwen/qwen2.5-… + + + +.. code:: ipython3 + + from pathlib import Path + + llm_model_path = llm_model_id.value.split("/")[-1] + + if not Path(llm_model_path).exists(): + !optimum-cli export openvino --model {llm_model_id.value} --task text-generation-with-past --trust-remote-code --weight-format int4 --group-size 128 --ratio 1.0 --sym {llm_model_path} + +Select inference device for LLM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + llm_device = device_widget("CPU", exclude=["NPU"]) + + llm_device + + +.. parsed-literal:: + + [ERROR] 20:00:52.380 [NPUBackends] Cannot find backend for inference. Make sure the device is available. + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') + + + +Instantiate LLM using Optimum Intel +----------------------------------- + + + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ and +create pipelines to run an inference with OpenVINO Runtime using Hugging +Face APIs. The Optimum Inference models are API compatible with Hugging +Face Transformers models. This means we just need to replace +``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` +class. + +Below is an example of the RedPajama model + +.. code:: diff + + -from transformers import AutoModelForCausalLM + +from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline + + model_id = "togethercomputer/RedPajama-INCITE-Chat-3B-v1" + -model = AutoModelForCausalLM.from_pretrained(model_id) + +model = OVModelForCausalLM.from_pretrained(model_id, export=True) + +Model class initialization starts with calling ``from_pretrained`` +method. When downloading and converting Transformers model, the +parameter ``export=True`` should be added (as we already converted model +before, we do not need to provide this parameter). We can save the +converted model for the next usage with the ``save_pretrained`` method. +Tokenizer class and pipelines API are compatible with Optimum models. + +You can find more details about OpenVINO LLM inference using HuggingFace +Optimum API in `LLM inference +guide `__. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, AutoConfig, TextStreamer + from transformers.generation import ( + StoppingCriteriaList, + StoppingCriteria, + ) + import openvino.properties as props + import openvino.properties.hint as hints + import openvino.properties.streams as streams + + import json + import json5 + import torch + + tokenizer = AutoTokenizer.from_pretrained(llm_model_path, trust_remote_code=True) + + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} + + llm = OVModelForCausalLM.from_pretrained( + llm_model_path, + device=llm_device.value, + ov_config=ov_config, + config=AutoConfig.from_pretrained(llm_model_path, trust_remote_code=True), + trust_remote_code=True, + ) + + llm.generation_config.top_k = 1 + llm.generation_config.max_length = 2000 + +Create text generation method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +In this example, we would like to stream the output text though +``TextStreamer``, and stop text generation before ``Observation`` +received from tool calling.. + +.. code:: ipython3 + + class StopSequenceCriteria(StoppingCriteria): + """ + This class can be used to stop generation whenever a sequence of tokens is encountered. + + Args: + stop_sequences (`str` or `List[str]`): + The sequence (or list of sequences) on which to stop execution. + tokenizer: + The tokenizer used to decode the model outputs. + """ + + def __init__(self, stop_sequences, tokenizer): + if isinstance(stop_sequences, str): + stop_sequences = [stop_sequences] + self.stop_sequences = stop_sequences + self.tokenizer = tokenizer + + def __call__(self, input_ids, scores, **kwargs) -> bool: + decoded_output = self.tokenizer.decode(input_ids.tolist()[0]) + return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences) + + + def text_completion(prompt: str, stop_words) -> str: + im_end = "<|im_end|>" + if im_end not in stop_words: + stop_words = stop_words + [im_end] + streamer = TextStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) + + stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(stop_words, tokenizer)]) + input_ids = torch.tensor([tokenizer.encode(prompt)]) + generate_kwargs = dict( + input_ids=input_ids, + streamer=streamer, + stopping_criteria=stopping_criteria, + ) + output = llm.generate(**generate_kwargs) + output = output.tolist()[0] + output = tokenizer.decode(output, errors="ignore") + assert output.startswith(prompt) + output = output[len(prompt) :].replace("<|endoftext|>", "").replace(im_end, "") + + for stop_str in stop_words: + idx = output.find(stop_str) + if idx != -1: + output = output[: idx + len(stop_str)] + return output + +Create prompt template +---------------------- + + + +A prompt for a language model is a set of instructions or input provided +by a user to guide the model’s response, helping it understand the +context and generate relevant and coherent language-based output, such +as answering questions, completing sentences, or engaging in a +conversation. + +Different agents have different prompting styles for reasoning. In this +example, we will use `ReAct agent `__ with +its typical prompt template. For a full list of built-in agents see +`agent +types `__. + +.. figure:: https://github.com/user-attachments/assets/c26432c2-3cf1-4942-ae03-fd8e8ebb4509 + :alt: react + + react + +A ReAct prompt consists of few-shot task-solving trajectories, with +human-written text reasoning traces and actions, as well as environment +observations in response to actions. ReAct prompting is intuitive and +flexible to design, and achieves state-of-the-art few-shot performances +across a variety of tasks, from question answering to online shopping! + +In an prompt template for agent, ``query`` is user’s query and other +parameter should be a sequence of messages that contains the +``descriptions`` and ``parameters`` of agent tool. + +.. code:: ipython3 + + TOOL_DESC = """{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}""" + + PROMPT_REACT = """Answer the following questions as best you can. You have access to the following APIs: + + {tools_text} + + Use the following format: + + Question: the input question you must answer + Thought: you should always think about what to do + Action: the action to take, should be one of [{tools_name_text}] + Action Input: the input to the action + Observation: the result of the action + ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) + Thought: I now know the final answer + Final Answer: the final answer to the original input question + + Begin! + + Question: {query}""" + +Meanwhile we have to create function for consolidate the tools +information and conversation history into the prompt template. + +.. code:: ipython3 + + def build_input_text(chat_history, list_of_tool_info) -> str: + tools_text = [] + for tool_info in list_of_tool_info: + tool = TOOL_DESC.format( + name_for_model=tool_info["name_for_model"], + name_for_human=tool_info["name_for_human"], + description_for_model=tool_info["description_for_model"], + parameters=json.dumps(tool_info["parameters"], ensure_ascii=False), + ) + if tool_info.get("args_format", "json") == "json": + tool += " Format the arguments as a JSON object." + elif tool_info["args_format"] == "code": + tool += " Enclose the code within triple backticks (`) at the beginning and end of the code." + else: + raise NotImplementedError + tools_text.append(tool) + tools_text = "\n\n".join(tools_text) + + tools_name_text = ", ".join([tool_info["name_for_model"] for tool_info in list_of_tool_info]) + + messages = [{"role": "system", "content": "You are a helpful assistant."}] + for i, (query, response) in enumerate(chat_history): + if list_of_tool_info: + if (len(chat_history) == 1) or (i == len(chat_history) - 2): + query = PROMPT_REACT.format( + tools_text=tools_text, + tools_name_text=tools_name_text, + query=query, + ) + if query: + messages.append({"role": "user", "content": query}) + if response: + messages.append({"role": "assistant", "content": response}) + + prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False, return_tensors="pt") + + return prompt + +Create parser +------------- + + + +A Parser is used to convert raw output of LLM to the input arguments of +tools. + +.. code:: ipython3 + + def parse_latest_tool_call(text): + tool_name, tool_args = "", "" + i = text.rfind("\nAction:") + j = text.rfind("\nAction Input:") + k = text.rfind("\nObservation:") + if 0 <= i < j: # If the text has `Action` and `Action input`, + if k < j: # but does not contain `Observation`, + # then it is likely that `Observation` is ommited by the LLM, + # because the output text may have discarded the stop word. + text = text.rstrip() + "\nObservation:" # Add it back. + k = text.rfind("\nObservation:") + tool_name = text[i + len("\nAction:") : j].strip() + tool_args = text[j + len("\nAction Input:") : k].strip() + text = text[:k] + return tool_name, tool_args, text + +Create tools calling +-------------------- + + + +In this examples, we will create 2 customized tools for +``image generation`` and ``weather qurey``. A detailed description of +these tools should be defined in json format, which will be used as part +of prompt. + +.. code:: ipython3 + + tools = [ + { + "name_for_human": "get weather", + "name_for_model": "get_weather", + "description_for_model": 'Get the current weather in a given city name."', + "parameters": [ + { + "name": "city_name", + "description": "City name", + "required": True, + "schema": {"type": "string"}, + } + ], + }, + { + "name_for_human": "image generation", + "name_for_model": "image_gen", + "description_for_model": "AI painting (image generation) service, input text description, and return the image URL drawn based on text information.", + "parameters": [ + { + "name": "prompt", + "description": "describe the image", + "required": True, + "schema": {"type": "string"}, + } + ], + }, + ] + +Then we should implement these tools with inputs and outputs, and +execute them according to the output of LLM. + +.. code:: ipython3 + + def call_tool(tool_name: str, tool_args: str) -> str: + if tool_name == "get_weather": + city_name = json5.loads(tool_args)["city_name"] + key_selection = { + "current_condition": [ + "temp_C", + "FeelsLikeC", + "humidity", + "weatherDesc", + "observation_time", + ], + } + resp = requests.get(f"https://wttr.in/{city_name}?format=j1") + resp.raise_for_status() + resp = resp.json() + ret = {k: {_v: resp[k][0][_v] for _v in v} for k, v in key_selection.items()} + return str(ret) + elif tool_name == "image_gen": + import urllib.parse + + tool_args = tool_args.replace("(", "").replace(")", "") + prompt = json5.loads(tool_args)["prompt"] + prompt = urllib.parse.quote(prompt) + return json.dumps( + {"image_url": f"https://image.pollinations.ai/prompt/{prompt}"}, + ensure_ascii=False, + ) + else: + raise NotImplementedError + + + def llm_with_tool(prompt: str, history, list_of_tool_info=()): + chat_history = [(x["user"], x["bot"]) for x in history] + [(prompt, "")] + + planning_prompt = build_input_text(chat_history, list_of_tool_info) + text = "" + while True: + output = text_completion(planning_prompt + text, stop_words=["Observation:", "Observation:\n"]) + action, action_input, output = parse_latest_tool_call(output) + if action: + observation = call_tool(action, action_input) + output += f"\nObservation: = {observation}\nThought:" + observation = f"{observation}\nThought:" + print(observation) + text += output + else: + text += output + break + + new_history = [] + new_history.extend(history) + new_history.append({"user": prompt, "bot": text}) + return text, new_history + +Run agent +--------- + + + +.. code:: ipython3 + + history = [] + query = "get the weather in London, and create a picture of Big Ben based on the weather information" + + response, history = llm_with_tool(prompt=query, history=history, list_of_tool_info=tools) + + +.. parsed-literal:: + + Thought: First, I need to use the get_weather API to get the current weather in London. + Action: get_weather + Action Input: {"city_name": "London"} + Observation: + {'current_condition': {'temp_C': '11', 'FeelsLikeC': '10', 'humidity': '94', 'weatherDesc': [{'value': 'Overcast'}], 'observation_time': '12:23 AM'}} + Thought: + Now that I have the weather information, I will use the image_gen API to generate an image of Big Ben based on the weather conditions. + Action: image_gen + Action Input: {"prompt": "Big Ben under overcast sky with temperature 11°C and humidity 94%"} + Observation: + {"image_url": "https://image.pollinations.ai/prompt/Big%20Ben%20under%20overcast%20sky%20with%20temperature%2011%C2%B0C%20and%20humidity%2094%25"} + Thought: + The image has been generated successfully. + Final Answer: The current weather in London is overcast with a temperature of 11°C and humidity of 94%. Based on this information, here is the image of Big Ben under an overcast sky: ![](https://image.pollinations.ai/prompt/Big%20Ben%20under%20overcast%20sky%20with%20temperature%2011%C2%B0C%20and%20humidity%2094%25) + diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst index ac4c22ffc3b185..dab94c37d77a4c 100644 --- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst +++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst @@ -97,15 +97,6 @@ Install required dependencies "transformers>=4.43.1" \ "onnx<=1.16.1; sys_platform=='win32'" "einops" "transformers_stream_generator" "tiktoken" "bitsandbytes" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import os @@ -199,8 +190,16 @@ several options for model weight compression:
+.. raw:: html + + + Click here to see available models options +.. raw:: html + + + - **tiny-llama-1b-chat** - This is the chat model finetuned on top of `TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T `__. The TinyLlama project aims to pretrain a 1.1B Llama model on 3 @@ -293,9 +292,10 @@ Click here to see available models options model can be found in `model card `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must - be a registered user in Hugging Face Hub. Please visit `HuggingFace - model card `__, carefully - read terms of usage and click accept button. You will need to use an + be a registered user in Hugging Face Hub. Please visit + `HuggingFace model + card `__, carefully read + terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the documentation `__. @@ -387,9 +387,10 @@ Click here to see available models options model can be found in `model card `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must - be a registered user in Hugging Face Hub. Please visit `HuggingFace - model card `__, carefully - read terms of usage and click accept button. You will need to use an + be a registered user in Hugging Face Hub. Please visit + `HuggingFace model + card `__, carefully read + terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the documentation `__. @@ -666,7 +667,7 @@ Click here to see available models options .. parsed-literal:: - Selected model qwen2.5-0.5b-instruct with INT4 compression + Selected model qwen2-0.5b-instruct with INT4 compression Convert model using Optimum-CLI tool @@ -674,8 +675,8 @@ Convert model using Optimum-CLI tool -`Optimum Intel `__ is -the interface between the +`Optimum Intel `__ +is the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -688,8 +689,16 @@ format.
+.. raw:: html + + + Click here to read more about Optimum CLI usage +.. raw:: html + + + The command bellow demonstrates basic command for model export with ``optimum-cli`` @@ -722,8 +731,16 @@ with the CLI.
+.. raw:: html + + + Click here to read more about weights compression with Optimum CLI +.. raw:: html + + + Setting ``--weight-format`` to respectively fp16, int8 or int4. This type of optimization allows to reduce the memory footprint and inference latency. By default the quantization scheme for int8/int4 will be @@ -766,47 +783,7 @@ be additionally applied during model export with INT4 precision using .. parsed-literal:: - ⌛ qwen2.5-0.5b-instruct conversion to INT4 started. It may takes some time. - - - -**Export command:** - - - -``optimum-cli export openvino --model Qwen/Qwen2.5-0.5B-Instruct --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 1.0 --sym qwen2.5/INT4_compressed_weights`` - - -.. parsed-literal:: - - 2024-10-08 02:53:02.359208: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:53:02.392956: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:53:02.929372: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Framework not specified. Using pt to export the model. - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> True - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/model_patcher.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.max_seq_len_cached: - Set tokenizer padding side to left for `text-generation-with-past` task. - - -.. parsed-literal:: - - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 28% (1 / 169) │ 0% (0 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 72% (168 / 169) │ 100% (168 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:14 • 0:00:00 - ✅ INT4 qwen2.5-0.5b-instruct model converted and can be found in qwen2.5/INT4_compressed_weights + ✅ INT4 qwen2-0.5b-instruct model already converted and can be found in qwen2/INT4_compressed_weights Let’s compare model size for different compression types @@ -820,7 +797,7 @@ Let’s compare model size for different compression types .. parsed-literal:: - Size of model with INT4 compressed weights is 322.44 MB + Size of model with INT4 compressed weights is 358.86 MB Select device for inference @@ -891,10 +868,10 @@ of the available generation parameters more deeply later. .. parsed-literal:: - Loading model from qwen2.5/INT4_compressed_weights + Loading model from qwen2/INT4_compressed_weights Input text: The Sun is yellow bacause - of its coloration. The Sun is blue because + it is made of hydrogen and oxygen atoms. The Run Chatbot @@ -909,8 +886,16 @@ Now, when model created, we can setup Chatbot interface using
+.. raw:: html + + + Click here to see how pipeline works +.. raw:: html + + + The diagram below illustrates how the chatbot pipeline works .. figure:: https://github.com/user-attachments/assets/9c9b56e1-01a6-48d8-aa46-222a88e25066 @@ -963,8 +948,16 @@ Advanced generation options
+.. raw:: html + + + Click here to see detailed description of advanced options +.. raw:: html + + + | There are several parameters that can control text generation quality, \* ``Temperature`` is a parameter used to control the level of creativity in AI-generated text. By adjusting the ``temperature``, you @@ -1036,27 +1029,13 @@ Click here to see detailed description of advanced options demo = make_demo(pipe, model_configuration, model_id, lang.value) try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(debug=False, share=True) + demo.launch(debug=True, share=True) # If you are launching remotely, specify server_name and server_port # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - - .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/llm-chatbot-with-output.rst b/docs/notebooks/llm-chatbot-with-output.rst index cbd76c0544ba82..0d214f5cccc0fc 100644 --- a/docs/notebooks/llm-chatbot-with-output.rst +++ b/docs/notebooks/llm-chatbot-with-output.rst @@ -83,9 +83,9 @@ Install required dependencies .. code:: ipython3 import os - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly @@ -104,12 +104,12 @@ Install required dependencies from pathlib import Path import requests import shutil - + # fetch model configuration - + config_shared_path = Path("../../utils/llm_config.py") config_dst_path = Path("llm_config.py") - + if not config_dst_path.exists(): if config_shared_path.exists(): try: @@ -184,7 +184,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -217,7 +217,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -278,7 +278,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -311,7 +311,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -345,7 +345,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -379,7 +379,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -410,7 +410,7 @@ Click here to see available models options .. code:: python - ## login to huggingfacehub to get access to pretrained model + ## login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -545,14 +545,14 @@ Click here to see available models options .. code:: ipython3 model_languages = list(SUPPORTED_LLM_MODELS) - + model_language = widgets.Dropdown( options=model_languages, value=model_languages[0], description="Model Language:", disabled=False, ) - + model_language @@ -567,14 +567,14 @@ Click here to see available models options .. code:: ipython3 model_ids = list(SUPPORTED_LLM_MODELS[model_language.value]) - + model_id = widgets.Dropdown( options=model_ids, value=model_ids[0], description="Model:", disabled=False, ) - + model_id @@ -603,7 +603,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ is -the interface between the +the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -655,12 +655,13 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : - -The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. - The -``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : + +- The ``--group-size`` parameter will define the group size to use for + quantization, -1 it will results in per-column quantization. +- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. @@ -671,7 +672,7 @@ sacrifice of the model size and inference latency. .. code:: ipython3 from IPython.display import Markdown, display - + prepare_int4_model = widgets.Checkbox( value=True, description="Prepare INT4 model", @@ -687,7 +688,7 @@ sacrifice of the model size and inference latency. description="Prepare FP16 model", disabled=False, ) - + display(prepare_int4_model) display(prepare_int8_model) display(prepare_fp16_model) @@ -756,14 +757,14 @@ We can now save floating point and compressed model variants .. code:: ipython3 from pathlib import Path - + pt_model_id = model_configuration["model_id"] pt_model_name = model_id.value.split("-")[0] fp16_model_dir = Path(model_id.value) / "FP16" int8_model_dir = Path(model_id.value) / "INT8_compressed_weights" int4_model_dir = Path(model_id.value) / "INT4_compressed_weights" - - + + def convert_to_fp16(): if (fp16_model_dir / "openvino_model.xml").exists(): return @@ -775,8 +776,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int8(): if (int8_model_dir / "openvino_model.xml").exists(): return @@ -789,8 +790,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + def convert_to_int4(): compression_configs = { "zephyr-7b-beta": { @@ -865,7 +866,7 @@ We can now save floating point and compressed model variants "ratio": 0.8, }, } - + model_compression_params = compression_configs.get(model_id.value, compression_configs["default"]) if (int4_model_dir / "openvino_model.xml").exists(): return @@ -883,8 +884,8 @@ We can now save floating point and compressed model variants display(Markdown("**Export command:**")) display(Markdown(f"`{export_command}`")) ! $export_command - - + + if prepare_fp16_model.value: convert_to_fp16() if prepare_int8_model.value: @@ -899,7 +900,7 @@ Let’s compare model size for different compression types fp16_weights = fp16_model_dir / "openvino_model.bin" int8_weights = int8_model_dir / "openvino_model.bin" int4_weights = int4_model_dir / "openvino_model.bin" - + if fp16_weights.exists(): print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB") for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]): @@ -925,16 +926,16 @@ Select device for inference and model variant .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget("CPU", exclude=["NPU"]) - + device @@ -958,14 +959,14 @@ variant of model weights and inference device available_models.append("INT8") if fp16_model_dir.exists(): available_models.append("FP16") - + model_to_run = widgets.Dropdown( options=available_models, value=available_models[0], description="Model to run:", disabled=False, ) - + model_to_run @@ -1017,13 +1018,13 @@ guide `__ from transformers import AutoConfig, AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM - + import openvino as ov import openvino.properties as props import openvino.properties.hint as hints import openvino.properties.streams as streams - - + + if model_to_run.value == "INT4": model_dir = int4_model_dir elif model_to_run.value == "INT8": @@ -1031,22 +1032,22 @@ guide `__ else: model_dir = fp16_model_dir print(f"Loading model from {model_dir}") - + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - + if "GPU" in device.value and "qwen2-7b-instruct" in model_id.value: ov_config["GPU_ENABLE_SDPA_OPTIMIZATION"] = "NO" - + # On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy # issues caused by this, which we avoid by setting precision hint to "f32". core = ov.Core() - + if model_id.value == "red-pajama-3b-chat" and "GPU" in core.available_devices and device.value in ["GPU", "AUTO"]: ov_config["INFERENCE_PRECISION_HINT"] = "f32" - + model_name = model_configuration["model_id"] tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) - + ov_model = OVModelForCausalLM.from_pretrained( model_dir, device=device.value, @@ -1120,14 +1121,14 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html :: - playing: 0.5 - sleeping: 0.25 - eating: 0.15 - driving: 0.05 - flying: 0.05 + playing: 0.5 + sleeping: 0.25 + eating: 0.15 + driving: 0.05 + flying: 0.05 - - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." - - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." + - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." + - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." - **High temperature** (e.g., 2.0): The AI model becomes more adventurous, increasing the chances of selecting less likely tokens, such as "driving" and "flying." - ``Top-p``, also known as nucleus sampling, is a parameter used to @@ -1165,7 +1166,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html import torch from threading import Event, Thread - + from typing import List, Tuple from transformers import ( AutoTokenizer, @@ -1173,8 +1174,8 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html StoppingCriteriaList, TextIteratorStreamer, ) - - + + model_name = model_configuration["model_id"] start_message = model_configuration["start_message"] history_template = model_configuration.get("history_template") @@ -1182,46 +1183,46 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html current_message_template = model_configuration.get("current_message_template") stop_tokens = model_configuration.get("stop_tokens") tokenizer_kwargs = model_configuration.get("tokenizer_kwargs", {}) - + max_new_tokens = 256 - - + + class StopOnTokens(StoppingCriteria): def __init__(self, token_ids): self.token_ids = token_ids - + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: for stop_id in self.token_ids: if input_ids[0][-1] == stop_id: return True return False - - + + if stop_tokens is not None: if isinstance(stop_tokens[0], str): stop_tokens = tok.convert_tokens_to_ids(stop_tokens) - + stop_tokens = [StopOnTokens(stop_tokens)] - - + + def default_partial_text_processor(partial_text: str, new_text: str): """ helper for updating partially generated answer, used by default - + Params: partial_text: text buffer for storing previosly generated text new_text: text update for the current step Returns: updated text string - + """ partial_text += new_text return partial_text - - + + text_processor = model_configuration.get("partial_text_processor", default_partial_text_processor) - - + + def convert_history_to_token(history: List[Tuple[str, str]]): """ function for conversion history stored as list pairs of user and assistant messages to tokens according to model expected conversation template @@ -1255,7 +1256,7 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html messages.append({"role": "user", "content": user_msg}) if model_msg: messages.append({"role": "assistant", "content": model_msg}) - + input_token = tok.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="pt") else: text = start_message + "".join( @@ -1276,12 +1277,12 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) input_token = tok(text, return_tensors="pt", **tokenizer_kwargs).input_ids return input_token - - + + def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id): """ callback function for running chatbot on submit button click - + Params: history: conversation history temperature: parameter for control the level of creativity in AI-generated text. @@ -1290,9 +1291,9 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability. repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text. conversation_id: unique conversation identifier. - + """ - + # Construct the input message string for the model by concatenating the current system message and conversation history # Tokenize the messages string input_ids = convert_history_to_token(history) @@ -1312,9 +1313,9 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html ) if stop_tokens is not None: generate_kwargs["stopping_criteria"] = StoppingCriteriaList(stop_tokens) - + stream_complete = Event() - + def generate_and_signal_complete(): """ genration function for single thread @@ -1322,18 +1323,18 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html global start_time ov_model.generate(**generate_kwargs) stream_complete.set() - + t1 = Thread(target=generate_and_signal_complete) t1.start() - + # Initialize an empty string to store the generated text partial_text = "" for new_text in streamer: partial_text = text_processor(partial_text, new_text) history[-1][1] = partial_text yield history - - + + def request_cancel(): ov_model.request.cancel() @@ -1342,11 +1343,11 @@ answers.https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO {model_id.value} Chatbot", language=model_language.value) - + try: demo.launch() except Exception: diff --git a/docs/notebooks/llm-rag-langchain-with-output.rst b/docs/notebooks/llm-rag-langchain-with-output.rst index 935c4c5ef1f205..1dec9cb2fb6659 100644 --- a/docs/notebooks/llm-rag-langchain-with-output.rst +++ b/docs/notebooks/llm-rag-langchain-with-output.rst @@ -127,7 +127,8 @@ Install required dependencies "onnx<1.16.2", "einops", "transformers_stream_generator", - "tiktoken" "transformers>=4.43.1", + "tiktoken", + "transformers>=4.43.1", "faiss-cpu", "sentence_transformers", "langchain>=0.2.0", diff --git a/docs/notebooks/magika-content-type-recognition-with-output.rst b/docs/notebooks/magika-content-type-recognition-with-output.rst index 2fbe7e63a8b21b..3ef21583fa5807 100644 --- a/docs/notebooks/magika-content-type-recognition-with-output.rst +++ b/docs/notebooks/magika-content-type-recognition-with-output.rst @@ -41,7 +41,6 @@ post `__ diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index fbb69d83fe239a..c1317625880917 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -54,21 +54,13 @@ Guide =2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Import @@ -645,7 +637,7 @@ bounds of input batch size. .. parsed-literal:: - + diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index 2c5cdf1aecf169..98f1217902a587 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -61,9 +61,15 @@ Prerequisites .. parsed-literal:: - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators + numpy (>=1.19.*) ; python_version >= "3.7" + ~~~~~~~^ + Note: you may need to restart the kernel to use updated packages. + WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators + numpy (>=1.19.*) ; python_version >= "3.7" + ~~~~~~~^ + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -107,7 +113,16 @@ details.
+.. raw:: html + + + Click here for more detailed explanation of conversion steps + +.. raw:: html + + + MiniCPM-V2.6 is autoregressive transformer generative model, it means that each next model step depends from model output from previous step. The generation approach is based on the assumption that the probability @@ -205,11 +220,12 @@ Let’s convert each model part. .. parsed-literal:: - 2024-10-08 02:54:38.009287: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 02:54:38.043246: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-10-07 09:57:53.402018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-10-07 09:57:53.403877: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. + 2024-10-07 09:57:53.440490: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 02:54:38.562064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - + 2024-10-07 09:57:54.270302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + .. code:: ipython3 @@ -222,157 +238,7 @@ Let’s convert each model part. ⌛ openbmb/MiniCPM-V-2_6 conversion started. Be patient, it may takes some time. ⌛ Load Original model - - - -.. parsed-literal:: - - Fetching 24 files: 0%| | 0/24 [00:00 self.max_seq_len_cached: - + .. parsed-literal:: - ✅ Language model successfully converted - ⌛ Convert Image embedding model ✅ Image embedding model successfully converted - ⌛ Convert Resamler model - - -.. parsed-literal:: - - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:421: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert embed_dim == embed_dim_to_check, \ - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:428: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert head_dim * num_heads == embed_dim, f"embed_dim {embed_dim} not divisible by num_heads {num_heads}" - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:434: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert key.shape == value.shape, f"key shape {key.shape} does not match value shape {value.shape}" - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:520: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert key_padding_mask.shape == (bsz, src_len), \ - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/ckpt/resampler.py:539: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - q_scaled = q / math.sqrt(E) - - -.. parsed-literal:: - - ✅ Resampler model successfully converted ✅ openbmb/MiniCPM-V-2_6 model sucessfully converted. You can find results in MiniCPM-V-2_6 - + Compress Language Model Weights to 4 bits ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -459,12 +288,21 @@ applied using `NNCF `__.
-Click here for more details about weight compression Weight compression -aims to reduce the memory footprint of a model. It can also lead to -significant performance improvement for large memory-bound models, such -as Large Language Models (LLMs). LLMs and other models, which require -extensive memory to store the weights during inference, can benefit from -weight compression in the following ways: +.. raw:: html + + + +Click here for more details about weight compression + +.. raw:: html + + + +Weight compression aims to reduce the memory footprint of a model. It +can also lead to significant performance improvement for large +memory-bound models, such as Large Language Models (LLMs). LLMs and +other models, which require extensive memory to store the weights during +inference, can benefit from weight compression in the following ways: - enabling the inference of exceptionally large models that cannot be accommodated in the memory of the device; @@ -547,33 +385,14 @@ documentation `__ @@ -74,11 +73,11 @@ Prerequisites .. parsed-literal:: Cloning into 'ml-mobileclip'... - remote: Enumerating objects: 84, done. - remote: Counting objects: 100% (84/84), done. - remote: Compressing objects: 100% (61/61), done. - remote: Total 84 (delta 29), reused 75 (delta 22), pack-reused 0 (from 0) - Unpacking objects: 100% (84/84), 467.39 KiB | 2.58 MiB/s, done. + remote: Enumerating objects: 95, done. + remote: Counting objects: 100% (95/95), done. + remote: Compressing objects: 100% (66/66), done. + remote: Total 95 (delta 38), reused 85 (delta 28), pack-reused 0 (from 0) + Unpacking objects: 100% (95/95), 469.11 KiB | 3.13 MiB/s, done. .. code:: ipython3 @@ -87,14 +86,13 @@ Prerequisites %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=1.13.1" "torchvision>=0.14.1" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" + %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -187,17 +185,17 @@ comparison purposes, you can select different models among: "image_size": 224, }, "clip-vit-b-16": { - "image_name": "ViT-B-16", + "model_name": "ViT-B-16", "pretrained": "openai", "image_size": 224, }, "clip-vit-l-14": { - "image_name": "ViT-L-14", + "model_name": "ViT-L-14", "pretrained": "datacomp_xl_s13b_b90k", "image_size": 224, }, "clip-vit-h-14": { - "image_name": "ViT-H-14", + "model_name": "ViT-H-14", "pretrained": "laion2b_s32b_b79k", "image_size": 224, }, @@ -408,6 +406,12 @@ preprocessing utilities tokenizer = open_clip.get_tokenizer(model_name) +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) + + .. parsed-literal:: @@ -450,8 +454,8 @@ Perform search .. parsed-literal:: - Image encoding took 0.108 ms - Text encoding took 0.0118 ms + Image encoding took 0.114 ms + Text encoding took 0.0113 ms @@ -529,7 +533,7 @@ be used separately. Let’s convert each part to OpenVINO. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len != self.num_embeddings: @@ -614,8 +618,8 @@ Perform search .. parsed-literal:: - Image encoding took 0.0271 ms - Text encoding took 0.00495 ms + Image encoding took 0.0294 ms + Text encoding took 0.00498 ms diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output.rst b/docs/notebooks/mobilevlm-language-assistant-with-output.rst index 1ba06287ff485c..5902537e3026a5 100644 --- a/docs/notebooks/mobilevlm-language-assistant-with-output.rst +++ b/docs/notebooks/mobilevlm-language-assistant-with-output.rst @@ -67,9 +67,7 @@ Install requirements Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - optimum-intel 1.20.0.dev0+542347b requires transformers<4.46,>=4.36, but you have transformers 4.33.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -121,13 +119,13 @@ Import required packages .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - 2024-10-08 03:04:16.549795: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:04:16.584461: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:02:06.143728: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:02:06.177889: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:04:17.090418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-11-05 02:02:06.679118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -344,15 +342,15 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): @@ -372,13 +370,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 24% (43 / 169) │ 20% (42 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 76% (126 / 169) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 24% (43 / 169) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 76% (126 / 169) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -418,7 +416,7 @@ compression instead of INT8 weight compression. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -438,13 +436,13 @@ compression instead of INT8 weight compression. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 28% (44 / 170) │ 20% (42 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 72% (126 / 170) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 28% (44 / 170) │ 20% (42 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 72% (126 / 170) │ 80% (126 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/model-server-with-output.rst b/docs/notebooks/model-server-with-output.rst index dc6c9e966cf462..d5a9347a46e807 100644 --- a/docs/notebooks/model-server-with-output.rst +++ b/docs/notebooks/model-server-with-output.rst @@ -181,14 +181,7 @@ following rules: .. code:: ipython3 - import platform - - %pip install -q "openvino>=2023.1.0" opencv-python tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2024.4.0" opencv-python tqdm "matplotlib>=3.4" .. code:: ipython3 diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index d1fc70cca19a6d..4adc89b9ff79e7 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,13 +124,13 @@ Imports .. parsed-literal:: - 2024-10-08 03:06:32.000424: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:06:32.034271: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:04:23.419260: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:04:23.453089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:06:32.663477: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + 2024-11-05 02:04:24.059462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -170,11 +170,11 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:797: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") @@ -229,7 +229,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -655,7 +655,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index 78700602513056..337458e35bbf0c 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -15,8 +15,8 @@ OpenVINO. Additionally, we will optimize model using - `Prerequisites <#prerequisites>`__ -- `Load PyTorch model <#load-pytorch-model>`__ -- `Run PyTorch Model Inference <#run-pytorch-model-inference>`__ +- `Select Model <#select-model>`__ +- `Download PyTorch model <#download-pytorch-model>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ - `Convert model to OpenVINO IR @@ -24,7 +24,6 @@ OpenVINO. Additionally, we will optimize model using - `Compress Model weights to 4 and 8 bits using NNCF <#compress-model-weights-to-4-and-8-bits-using-nncf>`__ - `Image Encoder <#image-encoder>`__ - - `Text Embeddings <#text-embeddings>`__ - `Language Model <#language-model>`__ - `Prepare model inference @@ -52,252 +51,222 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "openvino>=2024.1.0" "tqdm" "nncf>=2.10" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.13" + %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly "openvino-tokenizers[transformers]" "openvino>=2024.4.0" + %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. + mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + openvino-genai 2024.4.0.0 requires openvino_tokenizers~=2024.4.0.0.dev, but you have openvino-tokenizers 2024.5.0.0.dev20241022 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 - from huggingface_hub import snapshot_download from pathlib import Path - - model_local_dir = Path("nanoLLaVA") - - if not model_local_dir.exists(): - snapshot_download(repo_id="qnguyen3/nanoLLaVA", local_dir=model_local_dir) - - modeling_file = model_local_dir / "modeling_llava_qwen2.py" - orig_modeling_file = model_local_dir / f"orig_{modeling_file.name}" - - - # model code depends from flash_attn package that may be problematic to load. Patch model code for avoiding import of this package - if not orig_modeling_file.exists(): - modeling_file.rename(orig_modeling_file) - with orig_modeling_file.open("r") as f: - content = f.read() - replacement_lines = [ - ("from flash_attn import flash_attn_func, flash_attn_varlen_func", ""), - ("from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input", ""), - (' _flash_supports_window_size = "window_size" in list(inspect.signature(flash_attn_func).parameters)', "pass"), - ] - - for replace_pair in replacement_lines: - content = content.replace(*replace_pair) - - with modeling_file.open("w") as f: - f.write(content) - - + import requests -.. parsed-literal:: + helper_file = Path("ov_nano_llava_helper.py") - Fetching 14 files: 0%| | 0/14 [00:00`__ +- `nanoLLaVA-1.5 `__ +You can select one from the provided options below. -.. parsed-literal:: +.. code:: ipython3 - configuration_llava_qwen2.py: 0%| | 0.00/8.87k [00:00`__ +library. For convenience, we will use OpenVINO integration with +HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. -Load PyTorch model ------------------- +General command format: +.. code:: bash + optimum-cli export openvino --model --task -For creating PyTorch model we should use ``from_pretrained`` method of -``AutoModelForCausalLM`` model class. Model weights are already -downloaded from HuggingFace hub using ``snapshot_download`` function on -previous step. +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. .. code:: ipython3 - import transformers - from transformers import AutoModelForCausalLM, AutoTokenizer - from PIL import Image - import warnings - - transformers.logging.set_verbosity_error() - warnings.filterwarnings("ignore") - - model = AutoModelForCausalLM.from_pretrained(model_local_dir, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_local_dir, trust_remote_code=True) + if not converted_model_exists(ov_model_dir): + !optimum-cli export openvino --model {model_id} --task image-text-to-text --trust-remote-code --weight-format fp16 {ov_model_dir} .. parsed-literal:: - 2024-10-08 03:11:17.270186: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:11:17.304136: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:09:06.532025: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:09:06.565023: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:11:18.027701: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Run PyTorch Model Inference ---------------------------- - - - -.. code:: ipython3 - - import torch - import requests - - prompt = "Describe this image in detail" - - messages = [{"role": "user", "content": f"\n{prompt}"}] - text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - - text_chunks = [tokenizer(chunk).input_ids for chunk in text.split("")] - input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" - image = Image.open(requests.get(url, stream=True).raw) - image_tensor = model.process_images([image], model.config) - print(prompt) - image - - -.. parsed-literal:: - - Describe this image in detail - - + 2024-11-05 02:09:07.173118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Some weights of the model checkpoint at qnguyen3/nanoLLaVA were not used when initializing LlavaQwen2ForCausalLM: ['model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.bias', 'model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight', 'model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias', 'model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight', 'model.vision_tower.vision_tower.vision_model.head.attention.in_proj_bias', 'model.vision_tower.vision_tower.vision_model.head.attention.in_proj_weight', 'model.vision_tower.vision_tower.vision_model.head.attention.out_proj.bias', 'model.vision_tower.vision_tower.vision_model.head.attention.out_proj.weight', 'model.vision_tower.vision_tower.vision_model.head.layernorm.bias', 'model.vision_tower.vision_tower.vision_model.head.layernorm.weight', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc1.bias', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc1.weight', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc2.bias', 'model.vision_tower.vision_tower.vision_model.head.mlp.fc2.weight', 'model.vision_tower.vision_tower.vision_model.head.probe', 'model.vision_tower.vision_tower.vision_model.post_layernorm.bias', 'model.vision_tower.vision_tower.vision_model.post_layernorm.weight'] + - This IS expected if you are initializing LlavaQwen2ForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). + - This IS NOT expected if you are initializing LlavaQwen2ForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:169: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:187: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + or len(self.key_cache[layer_idx]) == 0 # the layer has no cache + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:116: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if past_key_values_length > 0: + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seq_len > self.max_seq_len_cached: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + OpenVINO and OpenVINO Tokenizers versions are not binary compatible. + OpenVINO version: 2024.5.0-16993 + OpenVINO Tokenizers version: 2024.5.0.0 + First 3 numbers should be the same. Update OpenVINO Tokenizers to compatible version. It is recommended to use the same day builds for pre-release version. To install both OpenVINO and OpenVINO Tokenizers release version perform: + pip install --force-reinstall openvino openvino-tokenizers + To update both OpenVINO and OpenVINO Tokenizers to the latest pre-release version perform: + pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + Tokenizer won't be converted. + Traceback (most recent call last): + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in + sys.exit(main()) + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main + service.run() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 349, in run + main_export( + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 416, in main_export + core = Core() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/__init__.py", line 53, in new_core_init + self.add_extension(str(_ext_path)) # Core.add_extension doesn't support Path object + RuntimeError: Exception from src/inference/src/cpp/core.cpp:158: + Cannot add extension. Cannot find entry point to the extension library. This error happened: Cannot load library '/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so': /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so: undefined symbol: _ZNK2ov4Node17can_constant_foldERKSt6vectorINS_6OutputIS0_EESaIS3_EE -.. image:: nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png - - - -.. code:: ipython3 - - from transformers import TextStreamer - - streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - output_ids = model.generate(input_ids, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) - - -.. parsed-literal:: - - The image features a white, fluffy lamb, likely a lama, in the midst of a fire. The lamb's fluffy fur is a mix of white and black, and it has a unique pattern of black spots on its body. The lamb's eyes are a bright shade of blue, and its ears are also white. The lamb's mouth is open, revealing pink lips, adding a playful touch to its overall appearance. - The lamb's face is quite detailed, with features such as a small black eye, a small nose, and a black mouth. The lamb's face is also quite expressive, with its mouth open, revealing pink lips - - -Convert and Optimize model --------------------------- - - - -Our model conversion and optimization consist of following steps: 1. -Convert model to OpenVINO format and save it on disk. 2. Compress model -weights using NNCF - -Let’s consider each step more deeply. - -Convert model to OpenVINO IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -Convert model to OpenVINO format using conversion helper function -defined bellow. We will use `OpenVINO Model Conversion -API `__ -for conversion PyTorch model to OpenVINO Intermediate Representation -format. ``ov.convert_model`` function accepts PyTorch model instance and -example input for tracing and returns ready to use OpenVINO Model object -that can be compiled on device using ``core.compile_model`` or saved on -disk for next usage with help ``ov.save_model`` function. Depends from -generation step, model accepts different inputs and activates different -parts of pipeline. For preserving the same level of flexibility, we will -split model on parts: Image Encoder, Text Embeddings, Language Model and -convert each part separately. Compress Model weights to 4 and 8 bits using NNCF -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -337,22 +306,20 @@ prediction quality. More details about weights compression, can be found in `OpenVINO documentation `__. - **Note**: There is no speedup for INT4 compressed models on dGPU. - Please select below whether you would like to run INT4 weight compression instead of INT8 weight compression. .. code:: ipython3 import ipywidgets as widgets - + compression_mode = widgets.Dropdown( options=["INT4", "INT8"], value="INT4", description="Compression mode:", disabled=False, ) - + compression_mode @@ -366,101 +333,25 @@ compression instead of INT8 weight compression. .. code:: ipython3 - import gc - import warnings - import torch - import openvino as ov import nncf - from typing import Optional, Tuple - - warnings.filterwarnings("ignore") - - - def flattenize_inputs(inputs): - """ - Helper function for making nested inputs flattens - """ - flatten_inputs = [] - for input_data in inputs: - if input_data is None: - continue - if isinstance(input_data, (list, tuple)): - flatten_inputs.extend(flattenize_inputs(input_data)) - else: - flatten_inputs.append(input_data) - return flatten_inputs - - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - def postprocess_converted_model( - ov_model, - example_input=None, - input_names=None, - output_names=None, - dynamic_shapes=None, - ): - """ - Helper function for appling postprocessing on converted model with updating input names, shapes and output names - acording to requested specification - """ - flatten_example_inputs = flattenize_inputs(example_input) if example_input else [] - - if input_names: - for inp_name, m_input, input_data in zip(input_names, ov_model.inputs, flatten_example_inputs): - input_node = m_input.get_node() - if input_node.element_type == ov.Type.dynamic: - m_input.get_node().set_element_type(ov.Type.f32) - shape = list(input_data.shape) - if dynamic_shapes is not None and inp_name in dynamic_shapes: - for k in dynamic_shapes[inp_name]: - shape[k] = -1 - input_node.set_partial_shape(ov.PartialShape(shape)) - m_input.get_tensor().set_names({inp_name}) - - if output_names: - for out, out_name in zip(ov_model.outputs, output_names): - out.get_tensor().set_names({out_name}) - ov_model.validate_nodes_and_infer_types() - return ov_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - + import openvino as ov -.. code:: ipython3 + core = ov.Core() if compression_mode.value == "INT4": - ov_out_path = Path("ov_nanollava/INT4_compressed_weights") + ov_compressed_model_dir = ov_model_dir.parent / "INT4" llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) else: - ov_out_path = Path("ov_nanollava/INT8_compressed_weights") + ov_compressed_model_dir = ov_model_dir.parent / "INT8" llava_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - + image_encoder_wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - - ov_out_path.mkdir(exist_ok=True, parents=True) - model.config.save_pretrained(ov_out_path) - vision_tower = model.get_vision_tower() - if not vision_tower.is_loaded: - vision_tower.load_model() - - image_encoder_path = ov_out_path / "image_encoder.xml" - token_embedding_model_path = ov_out_path / "token_embed.xml" - model_path = ov_out_path / "llava_with_past.xml" - - model.eval() - model.config.use_cache = True - model.config.torchscript = True + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + Image Encoder ~~~~~~~~~~~~~ @@ -469,60 +360,42 @@ Image Encoder Image Encoder is represented in nanoLLaVA by pretrained SigLIP model. Image encoder is responsible for encoding input images into embedding -space. +space. Code bellow demonstrates how to apply weights compression for +image encoder model. .. code:: ipython3 - if not image_encoder_path.exists(): - model.forward = model.encode_images - with torch.no_grad(): - ov_model = ov.convert_model( - model, - example_input=torch.zeros((1, 3, 384, 384)), - input=[(-1, 3, 384, 384)], - ) - if image_encoder_wc_parameters is not None: - print("Applying weight compression to image encoder") - ov_model = nncf.compress_weights(ov_model, **image_encoder_wc_parameters) - ov.save_model(ov_model, image_encoder_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Image Encoder model successfully converted") - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - - -.. parsed-literal:: + import gc - WARNING:nncf:NNCF provides best results with torch==2.4.*, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.4.* + compressed_vision_encoder_path = ov_compressed_model_dir / "openvino_vision_embeddings_model.xml" + vision_encoder_path = ov_model_dir / "openvino_vision_embeddings_model.xml" + if not compressed_vision_encoder_path.exists(): + ov_vision_encoder = core.read_model(vision_encoder_path) + ov_compressed_vision_encoder = nncf.compress_weights(ov_vision_encoder, **image_encoder_wc_parameters) + ov.save_model(ov_compressed_vision_encoder, compressed_vision_encoder_path) + del ov_compressed_vision_encoder + del ov_vision_encoder + gc.collect(); .. parsed-literal:: - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. + warning_deprecated( + 2024-11-05 02:09:38.791476: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:09:38.825207: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-11-05 02:09:39.427301: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: - Applying weight compression to image encoder INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (159 / 159) │ 100% (159 / 159) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (159 / 159) │ 100% (159 / 159) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -538,39 +411,6 @@ space. -.. parsed-literal:: - - Image Encoder model successfully converted - - -Text Embeddings -~~~~~~~~~~~~~~~ - - - -In LLMs, input embedding is a part of language model, but for LLaVA the -first step hidden state produced by this model part should be integrated -with image embeddings into common embedding space. For ability to reuse -this model part and avoid introduction of extra llm model instance, we -will use it separately. - -.. code:: ipython3 - - if not token_embedding_model_path.exists(): - with torch.no_grad(): - ov_model = ov.convert_model(model.get_model().embed_tokens, example_input=torch.ones((1, 10), dtype=torch.long)) - ov.save_model(ov_model, token_embedding_model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - print("Token Embedding model successfully converted") - - -.. parsed-literal:: - - Token Embedding model successfully converted - - Language Model ~~~~~~~~~~~~~~ @@ -579,71 +419,22 @@ Language Model Language Model is responsible for generation answer in LLaVA. This part is very similar to standard LLM for text generation. Our model uses `Qwen/Qwen1.5-0.5B `__ as base -LLM. To optimize the generation process and use memory more efficiently, -HuggingFace transformers API provides a mechanism for caching model -state externally using ``use_cache=True`` parameter and -``past_key_values`` argument in inputs and outputs. With the cache, the -model saves the hidden state once it has been computed. The model only -computes the one for the most recently generated output token at each -time step, re-using the saved ones for hidden tokens. This reduces the -generation complexity from :math:`O(n^3)` to :math:`O(n^2)` for a -transformer model. With this option, the model gets the previous step’s -hidden states (cached attention keys and values) as input and -additionally provides hidden states for the current step as output. It -means for all next iterations, it is enough to provide only a new token -obtained from the previous step and cached key values to get the next -token prediction. +LLM. .. code:: ipython3 - if not model_path.exists(): - model.forward = super(type(model), model).forward - example_input = {"attention_mask": torch.ones([2, 10], dtype=torch.int64), "position_ids": torch.tensor([[8, 9], [8, 9]], dtype=torch.int64)} - - dynamic_shapes = { - "input_embeds": {0: "batch_size", 1: "seq_len"}, - "attention_mask": {0: "batch_size", 1: "prev_seq_len + seq_len"}, - "position_ids": {0: "batch_size", 1: "seq_len"}, - } - input_embeds = torch.zeros((2, 2, model.config.hidden_size)) - - input_names = ["attention_mask", "position_ids"] - output_names = ["logits"] - - past_key_values = [] - for i in range(model.config.num_hidden_layers): - kv = [torch.randn([2, model.config.num_key_value_heads, 8, model.config.hidden_size // model.config.num_attention_heads]) for _ in range(2)] - past_key_values.append(kv) - input_names.extend([f"past_key_values.{i}.key", f"past_key_values.{i}.value"]) - output_names.extend([f"present.{i}.key", f"present.{i}.value"]) - dynamic_shapes[input_names[-2]] = {0: "batch_size", 2: "seq_len"} - dynamic_shapes[input_names[-1]] = {0: "batch_size", 2: "seq_len"} - - example_input["past_key_values"] = past_key_values - example_input["inputs_embeds"] = input_embeds - input_names.append("inputs_embeds") - dynamic_shapes["inputs_embeds"] = {0: "batch_size", 1: "seq_len"} - ov_model = ov.convert_model(model, example_input=example_input) - ov_model = postprocess_converted_model( - ov_model, example_input=example_input.values(), input_names=input_names, output_names=output_names, dynamic_shapes=dynamic_shapes - ) - - if llava_wc_parameters is not None: - print("Applying weight compression to second stage LLava model") - ov_model = nncf.compress_weights(ov_model, **llava_wc_parameters) - ov.save_model(ov_model, model_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - print("LLaVA model successfully converted") - del model - gc.collect(); - + compressed_llm_path = ov_compressed_model_dir / "openvino_language_model.xml" + llm_path = ov_model_dir / "openvino_language_model.xml" -.. parsed-literal:: + if not compressed_llm_path.exists(): + ov_llm = core.read_model(llm_path) + ov_compressed_llm = nncf.compress_weights(ov_llm, **llava_wc_parameters) + ov.save_model(ov_compressed_llm, compressed_llm_path) + del ov_compressed_llm + del ov_llm + gc.collect() - Applying weight compression to second stage LLava model + copy_model_files(ov_model_dir, ov_compressed_model_dir) @@ -662,13 +453,13 @@ token prediction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 47% (48 / 169) │ 20% (47 / 168) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 53% (121 / 169) │ 80% (121 / 168) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 47% (48 / 169) │ 20% (47 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 53% (121 / 169) │ 80% (121 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -684,365 +475,24 @@ token prediction. -.. parsed-literal:: - - LLaVA model successfully converted - - Prepare model inference pipeline -------------------------------- -``OVLlavaQwen2ForCausalLM`` class provides ease-to-use interface for -using model in generation scenario. It is based on -``transformers.generation.GenerationMixin`` that gives us opportunity to -reuse all reach capabilities for generation implemented in HuggingFace -Transformers library. More details about this interface can be found in -`HuggingFace -documentation `__. - -.. code:: ipython3 - - from transformers.generation import GenerationConfig, GenerationMixin - from transformers.modeling_outputs import CausalLMOutputWithPast - from transformers import AutoConfig - from transformers.image_processing_utils import BatchFeature, get_size_dict - from transformers.image_transforms import ( - convert_to_rgb, - normalize, - rescale, - resize, - to_channel_dimension_format, - ) - from transformers.image_utils import ( - ChannelDimension, - PILImageResampling, - to_numpy_array, - ) - import numpy as np - import torch - from typing import Dict - from functools import partial, reduce - - IGNORE_INDEX = -100 - IMAGE_TOKEN_INDEX = -200 - - - class ImageProcessor: - def __init__( - self, - image_mean=(0.5, 0.5, 0.5), - image_std=(0.5, 0.5, 0.5), - size=(384, 384), - crop_size: Dict[str, int] = None, - resample=PILImageResampling.BICUBIC, - rescale_factor=1 / 255, - data_format=ChannelDimension.FIRST, - ): - crop_size = crop_size if crop_size is not None else {"height": 384, "width": 384} - crop_size = get_size_dict(crop_size, default_to_square=True, param_name="crop_size") - - self.image_mean = image_mean - self.image_std = image_std - self.size = size - self.resample = resample - self.rescale_factor = rescale_factor - self.data_format = data_format - self.crop_size = crop_size - - def preprocess(self, images, return_tensors): - if isinstance(images, Image.Image): - images = [images] - else: - assert isinstance(images, list) - - transforms = [ - convert_to_rgb, - to_numpy_array, - partial(resize, size=self.size, resample=self.resample, data_format=self.data_format), - partial(rescale, scale=self.rescale_factor, data_format=self.data_format), - partial(normalize, mean=self.image_mean, std=self.image_std, data_format=self.data_format), - partial(to_channel_dimension_format, channel_dim=self.data_format, input_channel_dim=self.data_format), - ] - - images = reduce(lambda x, f: [*map(f, x)], transforms, images) - data = {"pixel_values": images} - - return BatchFeature(data=data, tensor_type=return_tensors) - - - class OVLlavaQwen2ForCausalLM(GenerationMixin): - def __init__(self, core, model_dir, device): - self.image_encoder = core.compile_model(model_dir / "image_encoder.xml", device) - self.embed_tokens = core.compile_model(model_dir / "token_embed.xml", device) - self.model = core.read_model(model_dir / "llava_with_past.xml") - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} - self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.outputs)} - self.key_value_input_names = [key for key in self.input_names if "key_values" in key] - self.key_value_output_names = [key for key in self.output_names if "present" in key] - compiled_model = core.compile_model(self.model, device) - self.request = compiled_model.create_infer_request() - self.config = AutoConfig.from_pretrained(model_dir) - self.generation_config = GenerationConfig.from_model_config(self.config) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.num_pkv = 2 - self.image_processor = ImageProcessor() - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - return self.forward(input_ids, images, attention_mask, position_ids, past_key_values) - - def forward( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> CausalLMOutputWithPast: - """General inference method""" - inputs = self.prepare_inputs_for_multimodal(input_ids, position_ids, attention_mask, past_key_values, images) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor("logits").data) - - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def prepare_inputs_for_multimodal(self, input_ids, position_ids, attention_mask, past_key_values, images): - inputs = {} - if past_key_values is None: - past_key_values = self._dummy_past_key_values(input_ids.shape[0]) - else: - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - inputs.update(zip(self.key_value_input_names, past_key_values)) - - if images is None or input_ids.shape[1] == 1: - target_shape = past_key_values[-1][-1].shape[-2] + 1 if past_key_values is not None else input_ids.shape[1] - attention_mask = torch.cat( - ( - attention_mask, - torch.ones((attention_mask.shape[0], target_shape - attention_mask.shape[1]), dtype=attention_mask.dtype, device=attention_mask.device), - ), - dim=1, - ) - position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1 - inputs_embeds = self.embed_tokens(input_ids)[0] - inputs["attention_mask"] = attention_mask.numpy() - inputs["position_ids"] = position_ids.numpy() - inputs["inputs_embeds"] = inputs_embeds - - return inputs - - if type(images) is list or images.ndim == 5: - concat_images = torch.cat([image for image in images], dim=0) - image_features = self.encode_images(concat_images) - split_sizes = [image.shape[0] for image in images] - image_features = torch.split(image_features, split_sizes, dim=0) - image_features = [x.flatten(0, 1).to(self.device) for x in image_features] - else: - image_features = self.encode_images(images).to(self.device) - - # Let's just add dummy tensors if they do not exist, - # it is a headache to deal with None all the time. - # But it is not ideal, and if you have a better idea, - # please open an issue / submit a PR, thanks. - labels = None - _attention_mask = attention_mask - if attention_mask is None: - attention_mask = torch.ones_like(input_ids, dtype=torch.bool) - else: - attention_mask = attention_mask.bool() - if position_ids is None: - position_ids = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=input_ids.device) - if labels is None: - labels = torch.full_like(input_ids, IGNORE_INDEX) - - # remove the padding using attention_mask -- TODO: double check - input_ids = [cur_input_ids[cur_attention_mask] for cur_input_ids, cur_attention_mask in zip(input_ids, attention_mask)] - labels = [cur_labels[cur_attention_mask] for cur_labels, cur_attention_mask in zip(labels, attention_mask)] - - new_input_embeds = [] - new_labels = [] - cur_image_idx = 0 - for batch_idx, cur_input_ids in enumerate(input_ids): - num_images = (cur_input_ids == IMAGE_TOKEN_INDEX).sum() - if num_images == 0: - cur_image_features = image_features[cur_image_idx] - cur_input_embeds_1 = self.embed_tokens(cur_input_ids) - cur_input_embeds = torch.cat([cur_input_embeds_1, cur_image_features[0:0]], dim=0) - new_input_embeds.append(cur_input_embeds) - new_labels.append(labels[batch_idx]) - cur_image_idx += 1 - continue - - image_token_indices = [-1] + torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0].tolist() + [cur_input_ids.shape[0]] - cur_input_ids_noim = [] - cur_labels = labels[batch_idx] - cur_labels_noim = [] - for i in range(len(image_token_indices) - 1): - cur_input_ids_noim.append(cur_input_ids[image_token_indices[i] + 1 : image_token_indices[i + 1]]) - cur_labels_noim.append(cur_labels[image_token_indices[i] + 1 : image_token_indices[i + 1]]) - split_sizes = [x.shape[0] for x in cur_labels_noim] - cur_input_embeds = torch.from_numpy(self.embed_tokens(torch.cat(cur_input_ids_noim).unsqueeze(0))[0])[0] - cur_input_embeds_no_im = torch.split(cur_input_embeds, split_sizes, dim=0) - cur_new_input_embeds = [] - cur_new_labels = [] - - for i in range(num_images + 1): - cur_new_input_embeds.append(cur_input_embeds_no_im[i]) - cur_new_labels.append(cur_labels_noim[i]) - if i < num_images: - cur_image_features = image_features[cur_image_idx] - cur_image_idx += 1 - cur_new_input_embeds.append(cur_image_features) - cur_new_labels.append(torch.full((cur_image_features.shape[0],), IGNORE_INDEX, device=cur_labels.device, dtype=cur_labels.dtype)) - - cur_new_input_embeds = torch.cat(cur_new_input_embeds) - cur_new_labels = torch.cat(cur_new_labels) - - new_input_embeds.append(cur_new_input_embeds) - new_labels.append(cur_new_labels) - - # Truncate sequences to max length as image embeddings can make the sequence longer - tokenizer_model_max_length = getattr(self.config, "tokenizer_model_max_length", None) - if tokenizer_model_max_length is not None: - new_input_embeds = [x[:tokenizer_model_max_length] for x in new_input_embeds] - new_labels = [x[:tokenizer_model_max_length] for x in new_labels] - - # Combine them - max_len = max(x.shape[0] for x in new_input_embeds) - batch_size = len(new_input_embeds) - - new_input_embeds_padded = [] - new_labels_padded = torch.full((batch_size, max_len), IGNORE_INDEX, dtype=new_labels[0].dtype, device=new_labels[0].device) - attention_mask = torch.zeros((batch_size, max_len), dtype=attention_mask.dtype, device=attention_mask.device) - position_ids = torch.zeros((batch_size, max_len), dtype=position_ids.dtype, device=position_ids.device) - - for i, (cur_new_embed, cur_new_labels) in enumerate(zip(new_input_embeds, new_labels)): - cur_len = cur_new_embed.shape[0] - if getattr(self.config, "tokenizer_padding_side", "right") == "left": - new_input_embeds_padded.append( - torch.cat( - (torch.zeros((max_len - cur_len, cur_new_embed.shape[1]), dtype=cur_new_embed.dtype, device=cur_new_embed.device), cur_new_embed), dim=0 - ) - ) - if cur_len > 0: - new_labels_padded[i, -cur_len:] = cur_new_labels - attention_mask[i, -cur_len:] = True - position_ids[i, -cur_len:] = torch.arange(0, cur_len, dtype=position_ids.dtype, device=position_ids.device) - else: - new_input_embeds_padded.append( - torch.cat( - (cur_new_embed, torch.zeros((max_len - cur_len, cur_new_embed.shape[1]), dtype=cur_new_embed.dtype, device=cur_new_embed.device)), dim=0 - ) - ) - if cur_len > 0: - new_labels_padded[i, :cur_len] = cur_new_labels - attention_mask[i, :cur_len] = True - position_ids[i, :cur_len] = torch.arange(0, cur_len, dtype=position_ids.dtype, device=position_ids.device) - - new_input_embeds = torch.stack(new_input_embeds_padded, dim=0) - attention_mask = attention_mask.to(dtype=_attention_mask.dtype) - inputs["inputs_embeds"] = new_input_embeds.numpy() - inputs["attention_mask"] = attention_mask.numpy() - inputs["position_ids"] = position_ids.numpy() - - return inputs - - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): - """ - This function is used during running GenerationMixin.generate for preparing model specific inputs for - each generation step - """ - past_len = 0 - if past_key_values is not None: - input_ids = input_ids[:, -1].unsqueeze(-1) - past_len = past_key_values[-1][-1].shape[-2] - attention_mask = kwargs.get( - "attention_mask", - torch.ones(input_ids.shape[0], input_ids.shape[1] + past_len), - ) - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "position_ids": kwargs.get("position_ids", None), - "past_key_values": past_key_values, - "images": kwargs.get("images", None), - } - - def _reorder_cache(self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]: - """ - This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or - [`~PreTrainedModel.beam_sample`] is called. - This is required to match `past_key_values` with the correct beam_idx at every generation step. - """ - - # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache - return tuple(tuple(np.take(past_state, beam_idx, 0) for past_state in layer_past) for layer_past in past_key_values) - - def _dummy_past_key_values(self, batch_size): - pkv = [] - for input_name in self.key_value_input_names: - input_t = self.model.input(input_name) - input_shape = self.model.input(input_name).get_partial_shape() - input_shape[0] = batch_size - input_shape[2] = 0 - pkv.append(ov.Tensor(input_t.get_element_type(), input_shape.get_shape())) - - return pkv - - def encode_images(self, images): - return torch.from_numpy(self.image_encoder(images)[0]) - - def expand2square(self, pil_img, background_color): - width, height = pil_img.size - if width == height: - return pil_img - elif width > height: - result = Image.new(pil_img.mode, (width, width), background_color) - result.paste(pil_img, (0, (width - height) // 2)) - return result - else: - result = Image.new(pil_img.mode, (height, height), background_color) - result.paste(pil_img, ((height - width) // 2, 0)) - return result - - def process_images(self, images, model_cfg): - image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None) - new_images = [] - if image_aspect_ratio == "pad": - for image in images: - image = self.expand2square(image, tuple(int(x * 255) for x in self.image_processor.image_mean)) - image = self.image_processor.preprocess(image, return_tensors="pt")["pixel_values"][0] - new_images.append(image) - else: - return self.image_processor(images, return_tensors="pt")["pixel_values"] - if all(x.shape == new_images[0].shape for x in new_images): - new_images = torch.stack(new_images, dim=0) - return new_images +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading pixtral model, we will use +``OVModelForVisualCausalLM`` class that have compatible interface with +Transformers Pixtral implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. Run OpenVINO Model Inference ---------------------------- @@ -1057,16 +507,16 @@ Select device .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget("CPU", exclude=["NPU"]) - + device @@ -1078,22 +528,66 @@ Select device +Optimum Intel provides Transformers-like interface for inference +OpenVINO models that allows smooth integration into user application, +where you need just replace model class, other parts of pipeline - +preprocessing and postprocessing code remains the same. It means that we +can use the same tokenizer and image processor that provided with model. + .. code:: ipython3 - core = ov.Core() - - ov_model = OVLlavaQwen2ForCausalLM(core, ov_out_path, device.value) + from optimum.intel.openvino import OVModelForVisualCausalLM + from transformers import AutoConfig, AutoTokenizer, AutoProcessor, TextStreamer + + # prepare tokenizer + tokenizer = AutoTokenizer.from_pretrained(ov_compressed_model_dir, trust_remote_code=True) + + # prepare image processor + config = AutoConfig.from_pretrained(ov_compressed_model_dir, trust_remote_code=True) + processor = AutoProcessor.from_pretrained(config.mm_vision_tower) + + # initialize OpenVINO model inference class + ov_model = OVModelForVisualCausalLM.from_pretrained(ov_compressed_model_dir, device=device.value, trust_remote_code=True) .. code:: ipython3 + from ov_nano_llava_helper import process_images, process_text_input + from PIL import Image + + prompt = "Describe this image in detail" + + messages = [{"role": "user", "content": f"\n{prompt}"}] + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" + image = Image.open(requests.get(url, stream=True).raw) + image_tensor = process_images(image, None, processor) + input_ids, attention_mask = process_text_input(text, tokenizer) + streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - output_ids = ov_model.generate(input_ids, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + + display(image) + print(f"Question:\n{prompt}") + print("Answer:") + + output_ids = ov_model.generate(input_ids, attention_mask=attention_mask, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + + + +.. image:: nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png .. parsed-literal:: - The image features a white, fluffy lamb with a playful, cheerful expression. The lamb is positioned in the center of the image, and it appears to be in motion, as if it's running. The lamb's face is white and it has a cute, adorable expression. It has a pair of bright, black eyes that are wide open, and it has a small, pink nose. The lamb's ears are also white and are quite large. The lamb's legs are white and are positioned behind it. The lamb's tail is also white and is quite long. The lamb's body is fluffy and it covers a large portion of the + Setting `pad_token_id` to `eos_token_id`:None for open-end generation. + + +.. parsed-literal:: + + Question: + Describe this image in detail + Answer: + The image features a small, adorable white lamb standing amidst a fire. The lamb's fur is fluffy and white, and it is adorned with tiny black eyes that are bright and lively. The lamb's face is cute, with a small black nose and a small mouth. It seems like the lamb is looking straight at the camera, making it appear even more adorable. + The lamb's right ear is visible, and it is white and pink. The lamb's right eye is also black and pink. The lamb's face is quite detailed, with the nose and mouth visible. There are also details like the lamb's right foot, which is white Interactive demo @@ -1103,11 +597,11 @@ Interactive demo .. code:: ipython3 - import time from transformers import TextIteratorStreamer, StoppingCriteria from threading import Thread - - + import torch + + class KeywordsStoppingCriteria(StoppingCriteria): def __init__(self, keywords, tokenizer, input_ids): self.keywords = keywords @@ -1122,7 +616,7 @@ Interactive demo self.keyword_ids.append(torch.tensor(cur_keyword_ids)) self.tokenizer = tokenizer self.start_len = input_ids.shape[1] - + def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len) self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids] @@ -1135,14 +629,14 @@ Interactive demo if keyword in outputs: return True return False - + def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: outputs = [] for i in range(output_ids.shape[0]): outputs.append(self.call_for_batch(output_ids[i].unsqueeze(0), scores)) return all(outputs) - - + + def bot_streaming(message, history): messages = [] if message["files"]: @@ -1151,7 +645,7 @@ Interactive demo for _, hist in enumerate(history): if isinstance(hist[0], tuple): image = hist[0][0] - + if len(history) > 0 and image is not None: messages.append({"role": "user", "content": f"\n{history[1][0]}"}) messages.append({"role": "assistant", "content": history[1][1]}) @@ -1172,29 +666,32 @@ Interactive demo messages.append({"role": "user", "content": f"\n{message['text']}"}) elif len(history) == 0 and image is None: messages.append({"role": "user", "content": message["text"]}) - + print(messages) image = Image.open(image).convert("RGB") text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - text_chunks = [tokenizer(chunk).input_ids for chunk in text.split("")] - input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) + image_tensor = process_images(image, None, processor) + input_ids, attention_mask = process_text_input(text, tokenizer) stop_str = "<|im_end|>" keywords = [stop_str] stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - - image_tensor = ov_model.process_images([image], ov_model.config) generation_kwargs = dict( - input_ids=input_ids, images=image_tensor, streamer=streamer, max_new_tokens=128, stopping_criteria=[stopping_criteria], temperature=0.01 + input_ids=input_ids, + attention_mask=attention_mask, + images=image_tensor, + streamer=streamer, + max_new_tokens=128, + stopping_criteria=[stopping_criteria], + temperature=0.01, ) thread = Thread(target=ov_model.generate, kwargs=generation_kwargs) thread.start() - + buffer = "" for new_text in streamer: buffer += new_text generated_text_without_prompt = buffer[:] - time.sleep(0.04) yield generated_text_without_prompt .. code:: ipython3 @@ -1202,11 +699,11 @@ Interactive demo if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/nano-llava-multimodal-chatbot/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(fn=bot_streaming) - + try: demo.launch(debug=False) except Exception: @@ -1219,17 +716,12 @@ Interactive demo .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - + To create a public link, set `share=True` in `launch()`. -.. code:: ipython3 - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.jpg b/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.jpg similarity index 100% rename from docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.jpg rename to docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.jpg diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png b/docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png similarity index 100% rename from docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_7_1.png rename to docs/notebooks/nano-llava-multimodal-chatbot-with-output_files/nano-llava-multimodal-chatbot-with-output_22_0.png diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index 06636ec17bf7e9..a34f72f5d8ff1e 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -2,12 +2,10 @@ Live Object Detection with OpenVINO™ ==================================== This notebook demonstrates live object detection with OpenVINO, using -the `SSDLite -MobileNetV2 `__ -from `Open Model -Zoo `__. Final part -of this notebook shows live inference results from a webcam. -Additionally, you can also upload a video file. +the `Ultralytics +YOLOv8 `__. Final part of +this notebook shows live inference results from a webcam. Additionally, +you can also upload a video file. **NOTE**: To use this notebook with a webcam, you need to run the notebook on a computer with a webcam. If you run the notebook on a @@ -25,13 +23,12 @@ Additionally, you can also upload a video file. - `The Model <#the-model>`__ - - `Download the Model <#download-the-model>`__ - - `Convert the Model <#convert-the-model>`__ + - `Download and convert the + Model <#download-and-convert-the-model>`__ - `Load the Model <#load-the-model>`__ - `Processing <#processing>`__ - - `Process Results <#process-results>`__ - `Main Processing Function <#main-processing-function>`__ - `Run <#run>`__ @@ -62,8 +59,8 @@ Install requirements .. code:: ipython3 - %pip install -q "openvino-dev>=2024.0.0" - %pip install -q tensorflow + %pip install -q "openvino>=2024.4.0" + %pip install -q "ultralytics==8.3.0" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q opencv-python requests tqdm # Fetch `notebook_utils` module @@ -79,11 +76,6 @@ Install requirements .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - magika 0.5.1 requires numpy<2.0,>=1.24; python_version >= "3.8" and python_version < "3.9", but you have numpy 1.23.5 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.23.5 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -103,17 +95,10 @@ Imports .. code:: ipython3 - import collections - import tarfile - import time from pathlib import Path - - import cv2 - import numpy as np - from IPython import display + import gc import openvino as ov - from openvino.tools.mo.front import tf as ov_tf_front - from openvino.tools import mo + from ultralytics import YOLO import notebook_utils as utils @@ -122,92 +107,53 @@ The Model -Download the Model -~~~~~~~~~~~~~~~~~~ - +Download and convert the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Use the ``download_file``, a function from the ``notebook_utils`` file. -It automatically creates a directory structure and downloads the -selected model. This step is skipped if the package is already -downloaded and unpacked. The chosen model comes from the public -directory, which means it must be converted into OpenVINO Intermediate -Representation (OpenVINO IR). - - **NOTE**: Using a model other than ``ssdlite_mobilenet_v2`` may - require different conversion parameters as well as pre- and - post-processing. .. code:: ipython3 # A directory where the model will be downloaded. - base_model_dir = Path("model") - - # The name of the model from Open Model Zoo - model_name = "ssdlite_mobilenet_v2" - archive_name = Path(f"{model_name}_coco_2018_05_09.tar.gz") - model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/{model_name}/{archive_name}" + # The name of the model + model_name = "yolov8n" - # Download the archive - downloaded_model_path = base_model_dir / archive_name - if not downloaded_model_path.exists(): - utils.download_file(model_url, downloaded_model_path.name, downloaded_model_path.parent) + det_model_path = Path(f"{model_name}_openvino_model/{model_name}.xml") - # Unpack the model - tf_model_path = base_model_dir / archive_name.with_suffix("").stem / "frozen_inference_graph.pb" - if not tf_model_path.exists(): - with tarfile.open(downloaded_model_path) as file: - file.extractall(base_model_dir) - + # export model to OpenVINO format using Ultralytics API + if not det_model_path.exists(): + pt_model = YOLO(f"{model_name}.pt") + pt_model.export(format="openvino", dynamic=True, half=True) + del pt_model + gc.collect() .. parsed-literal:: - model/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz: 0%| | 0.00/48.7M [00:00`__ -(``mo.convert_model`` function). If the model has been already -converted, this step is skipped. - -.. code:: ipython3 - - precision = "FP16" - # The output path for the conversion. - converted_model_path = Path("model") / f"{model_name}_{precision.lower()}.xml" - - # Convert it to IR if not previously converted - trans_config_path = Path(ov_tf_front.__file__).parent / "ssd_v2_support.json" - if not converted_model_path.exists(): - ov_model = mo.convert_model( - tf_model_path, - compress_to_fp16=(precision == "FP16"), - transformations_config=trans_config_path, - tensorflow_object_detection_api_pipeline_config=tf_model_path.parent / "pipeline.config", - reverse_input_channels=True, - ) - ov.save_model(ov_model, converted_model_path) - del ov_model + Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'... .. parsed-literal:: - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html + 100%|██████████| 6.25M/6.25M [00:00<00:00, 25.9MB/s] .. parsed-literal:: - [ WARNING ] The Preprocessor block has been removed. Only nodes performing mean value subtraction and scaling (if applicable) are kept. + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs + + PyTorch: starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) + + OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... + OpenVINO: export success ✅ 1.3s, saved as 'yolov8n_openvino_model/' (6.4 MB) + + Export complete (1.5s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam + Predict: yolo predict task=detect model=yolov8n_openvino_model imgsz=640 half + Validate: yolo val task=detect model=yolov8n_openvino_model imgsz=640 data=coco.yaml half + Visualize: https://netron.app Load the Model @@ -242,208 +188,44 @@ best performance. For that purpose, just use ``AUTO``. .. code:: ipython3 - # Read the network and corresponding weights from a file. - model = core.read_model(model=converted_model_path) - # Compile the model for CPU (you can choose manually CPU, GPU etc.) - # or let the engine choose the best available device (AUTO). - compiled_model = core.compile_model(model=model, device_name=device.value) - - # Get the input and output nodes. - input_layer = compiled_model.input(0) - output_layer = compiled_model.output(0) - - # Get the input size. - height, width = list(input_layer.shape)[1:3] - -Input and output layers have the names of the input node and output node -respectively. In the case of SSDLite MobileNetV2, there is 1 input and 1 -output. - -.. code:: ipython3 - - input_layer.any_name, output_layer.any_name - - - - -.. parsed-literal:: - - ('image_tensor:0', 'detection_boxes:0') - - - -Processing ----------- - - - -Process Results -~~~~~~~~~~~~~~~ - - - -First, list all available classes and create colors for them. Then, in -the post-process stage, transform boxes with normalized coordinates -``[0, 1]`` into boxes with pixel coordinates ``[0, image_size_in_px]``. -Afterward, use `non-maximum -suppression `__ -to reject overlapping detections and those below the probability -threshold (0.5). Finally, draw boxes and labels inside them. - -.. code:: ipython3 - - # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ - classes = [ - "background", - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic light", - "fire hydrant", - "street sign", - "stop sign", - "parking meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "hat", - "backpack", - "umbrella", - "shoe", - "eye glasses", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports ball", - "kite", - "baseball bat", - "baseball glove", - "skateboard", - "surfboard", - "tennis racket", - "bottle", - "plate", - "wine glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted plant", - "bed", - "mirror", - "dining table", - "window", - "desk", - "toilet", - "door", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "blender", - "book", - "clock", - "vase", - "scissors", - "teddy bear", - "hair drier", - "toothbrush", - "hair brush", - ] - - # Colors for the classes above (Rainbow Color Map). - colors = cv2.applyColorMap( - src=np.arange(0, 255, 255 / len(classes), dtype=np.float32).astype(np.uint8), - colormap=cv2.COLORMAP_RAINBOW, - ).squeeze() + core = ov.Core() - def process_results(frame, results, thresh=0.6): - # The size of the original frame. - h, w = frame.shape[:2] - # The 'results' variable is a [1, 1, 100, 7] tensor. - results = results.squeeze() - boxes = [] - labels = [] - scores = [] - for _, label, score, xmin, ymin, xmax, ymax in results: - # Create a box with pixels coordinates from the box with normalized coordinates [0,1]. - boxes.append(tuple(map(int, (xmin * w, ymin * h, (xmax - xmin) * w, (ymax - ymin) * h)))) - labels.append(int(label)) - scores.append(float(score)) + def load_model(det_model_path, device): + compiled_model = compile_model(det_model_path, device) + det_model = YOLO(det_model_path.parent, task="detect") - # Apply non-maximum suppression to get rid of many overlapping entities. - # See https://paperswithcode.com/method/non-maximum-suppression - # This algorithm returns indices of objects to keep. - indices = cv2.dnn.NMSBoxes(bboxes=boxes, scores=scores, score_threshold=thresh, nms_threshold=0.6) + if det_model.predictor is None: + custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"} # method defaults + args = {**det_model.overrides, **custom} + det_model.predictor = det_model._smart_load("predictor")(overrides=args, _callbacks=det_model.callbacks) + det_model.predictor.setup_model(model=det_model.model) - # If there are no boxes. - if len(indices) == 0: - return [] + det_model.predictor.model.ov_compiled_model = compiled_model + return det_model - # Filter detected objects. - return [(labels[idx], scores[idx], boxes[idx]) for idx in indices.flatten()] + def compile_model(det_model_path, device): + det_ov_model = core.read_model(det_model_path) - def draw_boxes(frame, boxes): - for label, score, box in boxes: - # Choose color for the label. - color = tuple(map(int, colors[label])) - # Draw a box. - x2 = box[0] + box[2] - y2 = box[1] + box[3] - cv2.rectangle(img=frame, pt1=box[:2], pt2=(x2, y2), color=color, thickness=3) + ov_config = {} + if device != "CPU": + det_ov_model.reshape({0: [1, 3, 640, 640]}) + if "GPU" in device or ("AUTO" in device and "GPU" in core.available_devices): + ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"} + det_compiled_model = core.compile_model(det_ov_model, device, ov_config) + return det_compiled_model - # Draw a label name inside the box. - cv2.putText( - img=frame, - text=f"{classes[label]} {score:.2f}", - org=(box[0] + 10, box[1] + 30), - fontFace=cv2.FONT_HERSHEY_COMPLEX, - fontScale=frame.shape[1] / 1000, - color=color, - thickness=1, - lineType=cv2.LINE_AA, - ) - return frame + det_model = load_model(det_model_path, device.value) + + +.. parsed-literal:: + + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Loading yolov8n_openvino_model for OpenVINO inference... + Using OpenVINO LATENCY mode for batch=1 inference... + Main Processing Function ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -455,8 +237,18 @@ file. .. code:: ipython3 + from IPython import display + import cv2 + import numpy as np + + # Main processing function to run object detection. - def run_object_detection(source=0, flip=False, use_popup=False, skip_first_frames=0): + def run_object_detection( + source=0, + flip=False, + use_popup=False, + skip_first_frames=0, + ): player = None try: # Create a video player to play with target fps. @@ -467,7 +259,6 @@ file. title = "Press ESC to Exit" cv2.namedWindow(winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE) - processing_times = collections.deque() while True: # Grab the frame. frame = player.next() @@ -484,43 +275,10 @@ file. fy=scale, interpolation=cv2.INTER_AREA, ) - - # Resize the image and change dims to fit neural network input. - input_img = cv2.resize(src=frame, dsize=(width, height), interpolation=cv2.INTER_AREA) - # Create a batch of images (size = 1). - input_img = input_img[np.newaxis, ...] - - # Measure processing time. - - start_time = time.time() # Get the results. - results = compiled_model([input_img])[output_layer] - stop_time = time.time() - # Get poses from network results. - boxes = process_results(frame=frame, results=results) - - # Draw boxes on a frame. - frame = draw_boxes(frame=frame, boxes=boxes) - - processing_times.append(stop_time - start_time) - # Use processing times from last 200 frames. - if len(processing_times) > 200: - processing_times.popleft() - - _, f_width = frame.shape[:2] - # Mean processing time [ms]. - processing_time = np.mean(processing_times) * 1000 - fps = 1000 / processing_time - cv2.putText( - img=frame, - text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", - org=(20, 40), - fontFace=cv2.FONT_HERSHEY_COMPLEX, - fontScale=f_width / 1000, - color=(0, 0, 255), - thickness=1, - lineType=cv2.LINE_AA, - ) + input_image = np.array(frame) + detections = det_model(input_image, verbose=False) + frame = detections[0].plot() # Use this workaround if there is flickering. if use_popup: @@ -593,22 +351,10 @@ Run the object detection: -.. image:: object-detection-with-output_files/object-detection-with-output_19_0.png +.. image:: object-detection-with-output_files/object-detection-with-output_13_0.png .. parsed-literal:: Source ended - -References ----------- - - - -1. `SSDLite - MobileNetV2 `__ -2. `Open Model - Zoo `__ -3. `Non-Maximum - Suppression `__ diff --git a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_13_0.png b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_13_0.png new file mode 100644 index 00000000000000..1c548a0445636e --- /dev/null +++ b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_13_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3fca049d156843f0976e9386ece07d1846e92efcacc5c559de140e7733b67b3 +size 171752 diff --git a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png b/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png deleted file mode 100644 index 67d21a28902ef3..00000000000000 --- a/docs/notebooks/object-detection-with-output_files/object-detection-with-output_19_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bae0d18e0fe38cee76b5220d145c96e7d6d38b087cba46fc71db126eb3a88bb8 -size 175079 diff --git a/docs/notebooks/oneformer-segmentation-with-output.rst b/docs/notebooks/oneformer-segmentation-with-output.rst index cc94c7dbff9047..f0a9e5703e1644 100644 --- a/docs/notebooks/oneformer-segmentation-with-output.rst +++ b/docs/notebooks/oneformer-segmentation-with-output.rst @@ -63,14 +63,7 @@ Install required libraries .. code:: ipython3 - import platform - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.26.0" "openvino>=2023.1.0" "nncf>=2.7.0" "gradio>=4.19" "torch>=2.1" scipy ipywidgets Pillow tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.26.0" "openvino>=2023.1.0" "nncf>=2.7.0" "gradio>=4.19" "torch>=2.1" "matplotlib>=3.4" scipy ipywidgets Pillow tqdm Prepare the environment ----------------------- diff --git a/docs/notebooks/openvino-api-with-output.rst b/docs/notebooks/openvino-api-with-output.rst index 789fdeb53b1a45..b2b4c8c0f04fdd 100644 --- a/docs/notebooks/openvino-api-with-output.rst +++ b/docs/notebooks/openvino-api-with-output.rst @@ -201,7 +201,7 @@ notebooks. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -250,7 +250,7 @@ points to the filename of an ONNX model. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') @@ -310,7 +310,7 @@ without any conversion step. Pass the filename with extension to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') @@ -354,7 +354,7 @@ TensorFlow models saved in frozen graph format can also be passed to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') @@ -407,7 +407,7 @@ It is pre-trained model optimized to work with TensorFlow Lite. .. parsed-literal:: - Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.1) + Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.3) .. code:: ipython3 @@ -497,7 +497,7 @@ Information about the inputs and outputs of the model are in .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -703,7 +703,7 @@ produced data as values. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -892,7 +892,7 @@ input shape. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') @@ -1044,7 +1044,7 @@ the cache. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -1092,5 +1092,5 @@ measure the time it takes now. .. parsed-literal:: - Loading the network to the AUTO device took 0.07 seconds. + Loading the network to the AUTO device took 0.08 seconds. diff --git a/docs/notebooks/openvoice-with-output.rst b/docs/notebooks/openvoice-with-output.rst index 4e4a53150f7504..2ee11fcded84dc 100644 --- a/docs/notebooks/openvoice-with-output.rst +++ b/docs/notebooks/openvoice-with-output.rst @@ -99,14 +99,12 @@ Clone repository and install requirements Cloning into 'OpenVoice'... remote: Enumerating objects: 438, done. - remote: Counting objects: 100% (238/238), done. - remote: Compressing objects: 100% (113/113), done. - remote: Total 438 (delta 178), reused 127 (delta 125), pack-reused 200 (from 1) - Receiving objects: 100% (438/438), 3.82 MiB | 16.31 MiB/s, done. - Resolving deltas: 100% (221/221), done. + remote: Total 438 (delta 0), reused 0 (delta 0), pack-reused 438 (from 1) + Receiving objects: 100% (438/438), 3.84 MiB | 21.51 MiB/s, done. + Resolving deltas: 100% (207/207), done. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1 which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -245,9 +243,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -261,9 +259,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -413,50 +411,50 @@ documentation 0 No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 - %5559 : Float(1, 192, 151, strides=[28992, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + %5559 : Float(1, 192, 153, strides=[29376, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 38912]) != torch.Size([1, 1, 39424]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 39424]) != torch.Size([1, 1, 38656]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 152, 43]) != torch.Size([1, 1, 154, 43]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 154, 43]) != torch.Size([1, 1, 151, 43]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 152]) != torch.Size([1, 1, 154]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 154]) != torch.Size([1, 1, 151]). _check_trace( - 2024-10-08 03:16:11.904034: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 02:13:33.268258: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (199 / 199) │ 100% (199 / 199) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (199 / 199) │ 100% (199 / 199) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -479,27 +477,27 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -716,7 +714,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -871,7 +869,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -889,7 +887,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1078,7 +1076,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output.rst b/docs/notebooks/optical-character-recognition-with-output.rst index 59f72ff2fd84c3..7dae2290312e68 100644 --- a/docs/notebooks/optical-character-recognition-with-output.rst +++ b/docs/notebooks/optical-character-recognition-with-output.rst @@ -9,16 +9,9 @@ which shows only text detection. The `horizontal-text-detection-0001 `__ and -`text-recognition-resnet `__ +`text-recognition-0014 `__ models are used together for text detection and then text recognition. -In this tutorial, Open Model Zoo tools including Model Downloader, Model -Converter and Info Dumper are used to download and convert the models -from `Open Model -Zoo `__. For more -information, refer to the -`model-tools `__ tutorial. - **Table of contents:** @@ -26,7 +19,6 @@ information, refer to the - `Imports <#imports>`__ - `Settings <#settings>`__ - `Download Models <#download-models>`__ -- `Convert Models <#convert-models>`__ - `Select inference device <#select-inference-device>`__ - `Object Detection <#object-detection>`__ @@ -61,24 +53,12 @@ Guide =2024.0.0" "onnx<1.16.2" torch torchvision pillow opencv-python --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + # Install openvino package + %pip install -q "openvino>=2024.4.0" pillow opencv-python "matplotlib>=3.4" .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - torchaudio 2.4.1+cpu requires torch==2.4.1, but you have torch 2.2.2+cpu which is incompatible. - Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -120,7 +100,7 @@ Settings model_dir = Path("model") precision = "FP16" detection_model = "horizontal-text-detection-0001" - recognition_model = "text-recognition-resnet-fc" + recognition_model = "text-recognition-0014" model_dir.mkdir(exist_ok=True) @@ -129,274 +109,35 @@ Download Models -The next cells will run Model Downloader to download the detection and -recognition models. If the models have been downloaded before, they will -not be downloaded again. +The next cells will download the detection and recognition models. If +the models have been downloaded before, they will not be downloaded +again. .. code:: ipython3 - download_command = ( - f"omz_downloader --name {detection_model},{recognition_model} --output_dir {model_dir} --cache_dir {model_dir} --precision {precision} --num_attempts 5" - ) - display(Markdown(f"Download command: `{download_command}`")) - display(Markdown(f"Downloading {detection_model}, {recognition_model}...")) - !$download_command - display(Markdown(f"Finished downloading {detection_model}, {recognition_model}.")) - - detection_model_path = (model_dir / "intel/horizontal-text-detection-0001" / precision / detection_model).with_suffix(".xml") - recognition_model_path = (model_dir / "public/text-recognition-resnet-fc" / precision / recognition_model).with_suffix(".xml") - - - -Download command: -``omz_downloader --name horizontal-text-detection-0001,text-recognition-resnet-fc --output_dir model --cache_dir model --precision FP16 --num_attempts 5`` - - - -Downloading horizontal-text-detection-0001, text-recognition-resnet-fc… - - -.. parsed-literal:: - - ################|| Downloading horizontal-text-detection-0001 ||################ - - ========== Downloading model/intel/horizontal-text-detection-0001/FP16/horizontal-text-detection-0001.xml - - - ========== Downloading model/intel/horizontal-text-detection-0001/FP16/horizontal-text-detection-0001.bin - - - ################|| Downloading text-recognition-resnet-fc ||################ - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/model.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/weight_init.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/fc_head.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/heads/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/body.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/component.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/bricks.py - + from notebook_utils import download_ir_model - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/builder.py + detection_model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{detection_model}/{precision}/{detection_model}.xml" + recognition_model_url = ( + f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{recognition_model}/{precision}/{recognition_model}.xml" + ) - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/resnet.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/builder.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/conv_module.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/fc_module.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/norm.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/models/utils/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/__init__.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/common.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/registry.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/utils/config.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth - - - ========== Downloading model/public/text-recognition-resnet-fc/vedastr/addict-2.4.0-py3-none-any.whl - - - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/heads/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/sequences/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/component.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/decoders/bricks/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/enhance_modules/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/utils/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/__init__.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/utils/config.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/resnet.py - ========== Replacing text in model/public/text-recognition-resnet-fc/vedastr/models/bodies/feature_extractors/encoders/backbones/resnet.py - ========== Unpacking model/public/text-recognition-resnet-fc/vedastr/addict-2.4.0-py3-none-any.whl - - - - -Finished downloading horizontal-text-detection-0001, -text-recognition-resnet-fc. - - -.. code:: ipython3 - - ### The text-recognition-resnet-fc model consists of many files. All filenames are printed in - ### the output of Model Downloader. Uncomment the next two lines to show this output. + detection_model_path = download_ir_model(detection_model_url, model_dir / detection_model / precision) - # for line in download_result: - # print(line) - -Convert Models --------------- - + recognition_model_path = download_ir_model(recognition_model_url, model_dir / recognition_model / precision) -The downloaded detection model is an Intel model, which is already in -OpenVINO Intermediate Representation (OpenVINO IR) format. The text -recognition model is a public model which needs to be converted to -OpenVINO IR. Since this model was downloaded from Open Model Zoo, use -Model Converter to convert the model to OpenVINO IR format. - -The output of Model Converter will be displayed. When the conversion is -successful, the last lines of output will include -``[ SUCCESS ] Generated IR version 11 model.`` - -.. code:: ipython3 - - convert_command = f"omz_converter --name {recognition_model} --precisions {precision} --download_dir {model_dir} --output_dir {model_dir}" - display(Markdown(f"Convert command: `{convert_command}`")) - display(Markdown(f"Converting {recognition_model}...")) - ! $convert_command - - - -Convert command: -``omz_converter --name text-recognition-resnet-fc --precisions FP16 --download_dir model --output_dir model`` +.. parsed-literal:: + model/horizontal-text-detection-0001/FP16/horizontal-text-detection-0001.bin: 0%| | 0.00/3.70M [00:… -Converting text-recognition-resnet-fc… .. parsed-literal:: - ========== Converting text-recognition-resnet-fc to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx - - ONNX check passed successfully. - - ========== Converting text-recognition-resnet-fc to IR (FP16) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True - - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. - Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html - [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin - + model/text-recognition-0014/FP16/text-recognition-0014.bin: 0%| | 0.00/17.4M [00:00=2023.1.0" opencv-python tqdm - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.1.0" opencv-python tqdm "matplotlib>=3.4" %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 @@ -103,7 +97,6 @@ Settings Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -360,7 +353,7 @@ for mean/scale normalization. .. parsed-literal:: - + @@ -391,7 +384,7 @@ may be specified is input data .. parsed-literal:: - + @@ -429,7 +422,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -643,6 +636,6 @@ Compare performance .. parsed-literal:: - IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0148 seconds per image, FPS: 67.77 - IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0142 seconds per image, FPS: 70.46 + IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0153 seconds per image, FPS: 65.39 + IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0166 seconds per image, FPS: 60.23 diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index 6e0bcc263ed873..9f7510cd5efe96 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -75,6 +75,8 @@ Guide =1.22, but you have numpy 1.24.4 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -207,7 +209,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-no… .. parsed-literal:: @@ -253,7 +255,7 @@ Download the Model for Text **Recognition** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-no… .. parsed-literal:: diff --git a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png index e6377775fd7b9b..38a0d5d593351b 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png +++ b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:972e260b6c702c732ac3d1cb7cb2d8e38c9fe8ea0bd7b8e783e74f5d5c8cc6f5 -size 591296 +oid sha256:ac7efd85f2c50b0a189dbf00c0cd2252f362e6469cd014d8f255c53172152c3d +size 591373 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output.rst b/docs/notebooks/paddle-to-openvino-classification-with-output.rst index e3bb27d374c1aa..25feb9293ee93a 100644 --- a/docs/notebooks/paddle-to-openvino-classification-with-output.rst +++ b/docs/notebooks/paddle-to-openvino-classification-with-output.rst @@ -63,7 +63,7 @@ Imports else: %pip install -q "paddlepaddle>=2.5.1" %pip install -q "paddleclas>=2.5.2" --no-deps - %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm + %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm "matplotlib>=3.4" # Install openvino package %pip install -q "openvino>=2023.1.0" @@ -89,11 +89,11 @@ Imports .. parsed-literal:: - --2024-10-08 03:19:19-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + --2024-11-05 02:15:59-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.241.208.166 Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.241.208.166|:911... connected. Proxy request sent, awaiting response... 404 Not Found - 2024-10-08 03:19:19 ERROR 404: Not Found. + 2024-11-05 02:16:00 ERROR 404: Not Found. dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory @@ -124,8 +124,8 @@ Imports .. parsed-literal:: - 2024-10-08 03:19:21 INFO: Loading faiss with AVX512 support. - 2024-10-08 03:19:21 INFO: Successfully loaded faiss with AVX512 support. + 2024-11-05 02:16:02 INFO: Loading faiss with AVX512 support. + 2024-11-05 02:16:02 INFO: Successfully loaded faiss with AVX512 support. Settings @@ -209,7 +209,7 @@ inference on that image, and then show the top three prediction results. .. parsed-literal:: - [2024/10/08 03:19:44] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. + [2024/11/05 02:16:41] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. Labrador retriever, 0.75138 German short-haired pointer, 0.02373 Great Dane, 0.01848 @@ -275,7 +275,7 @@ clipping values. .. parsed-literal:: - 2024-10-08 03:19:45 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + 2024-11-05 02:16:42 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). .. parsed-literal:: @@ -287,7 +287,7 @@ clipping values. .. parsed-literal:: - + @@ -462,7 +462,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0077 seconds per image, FPS: 130.66 + PaddlePaddle model on CPU: 0.0074 seconds per image, FPS: 134.37 PaddlePaddle result: Labrador retriever, 0.75138 @@ -523,7 +523,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 380.65 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0027 seconds per image, FPS: 373.31 OpenVINO result: Labrador retriever, 0.74909 diff --git a/docs/notebooks/paint-by-example-with-output.rst b/docs/notebooks/paint-by-example-with-output.rst index ed3c0d2e0ab1d7..2f1371652c5750 100644 --- a/docs/notebooks/paint-by-example-with-output.rst +++ b/docs/notebooks/paint-by-example-with-output.rst @@ -58,7 +58,7 @@ This is the overall flow of the application: .. code:: ipython3 %pip install -q "torch>=2.1" torchvision --extra-index-url "https://download.pytorch.org/whl/cpu" - %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm + %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" "matplotlib>=3.4" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm Download the model from `HuggingFace Paint-by-Example `__. diff --git a/docs/notebooks/parler-tts-text-to-speech-with-output.rst b/docs/notebooks/parler-tts-text-to-speech-with-output.rst index 25e4d4bed03a7d..323959aa17e8ef 100644 --- a/docs/notebooks/parler-tts-text-to-speech-with-output.rst +++ b/docs/notebooks/parler-tts-text-to-speech-with-output.rst @@ -9,7 +9,7 @@ with synthetic annotations `__ by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. -|image0| +.. image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w Text-to-speech models trained on large-scale datasets have demonstrated impressive in-context learning capabilities and naturalness. However, @@ -53,8 +53,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w - Prerequisites ------------- @@ -69,24 +67,6 @@ Prerequisites %pip install -q "openvino>=2024.2.0" %pip install -q git+https://github.com/huggingface/parler-tts.git "gradio>=4.19" transformers "torch>=2.2" --extra-index-url https://download.pytorch.org/whl/cpu - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - googleapis-common-protos 1.65.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.20.2, but you have protobuf 3.19.6 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.4.1+cpu which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - onnx 1.16.1 requires protobuf>=3.20.2, but you have protobuf 3.19.6 which is incompatible. - paddlepaddle 2.6.2 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.19.6 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.19.6 which is incompatible. - tensorflow-datasets 4.9.2 requires protobuf>=3.20, but you have protobuf 3.19.6 which is incompatible. - tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 3.19.6 which is incompatible. - torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1+cpu which is incompatible. - visualdl 2.5.3 requires protobuf>=3.20.0, but you have protobuf 3.19.6 which is incompatible. - Note: you may need to restart the kernel to use updated packages. - - Load the original model and inference ------------------------------------- @@ -115,19 +95,6 @@ Load the original model and inference audio_arr = generation.cpu().numpy().squeeze() sf.write("parler_tts_out.wav", audio_arr, model.config.sampling_rate) - -.. parsed-literal:: - - 2024-10-08 03:20:24.075446: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:20:24.108997: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - Flash attention 2 is not installed - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. - WeightNorm.apply(module, name, dim) - You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers - The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. - - .. code:: ipython3 import IPython.display as ipd @@ -141,10 +108,10 @@ Load the original model and inference - + @@ -192,13 +159,6 @@ and Decoder (``ParlerTTSDecoder``). Lets convert them one by one. text_encoder_ov_model = convert(model.text_encoder, TEXT_ENCODER_OV_PATH, example_input) - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - - The Decoder Model performs in generation pipeline and we can separate it into two stage. In the first stage the model generates ``past_key_values`` into output for the second stage. In the second @@ -233,17 +193,6 @@ stage the model produces tokens during several runs. decoder_1_ov_model = convert(DecoderStage1Wrapper(model.decoder.model.decoder), DECODER_STAGE_1_OV_PATH, example_input) - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:253: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1599: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): - - .. code:: ipython3 DECODER_STAGE_2_OV_PATH = Path("models/decoder_stage_2_ir.xml") @@ -309,7 +258,7 @@ Select device from dropdown list for running inference using OpenVINO. .. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') @@ -411,10 +360,10 @@ and run inference. - + @@ -457,29 +406,13 @@ Interactive inference demo = make_demo(fn=infer) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(share=True, debug=False) + demo.queue().launch(share=True, debug=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB - - To create a public link, set `share=True` in `launch()`. - - - - - - - - .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/person-tracking-with-output.rst b/docs/notebooks/person-tracking-with-output.rst index b559c680b5503e..653a9b376edf7e 100644 --- a/docs/notebooks/person-tracking-with-output.rst +++ b/docs/notebooks/person-tracking-with-output.rst @@ -128,22 +128,14 @@ Guide =2024.0.0" - %pip install -q opencv-python requests scipy tqdm - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2024.0.0" + %pip install -q opencv-python requests scipy tqdm "matplotlib>=3.4" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. Imports @@ -197,16 +189,6 @@ We will use pre-trained models from OpenVINO’s `Open Model Zoo `__ to start the test. -Use ``omz_downloader``, which is a command-line tool from the -``openvino-dev`` package. It automatically creates a directory structure -and downloads the selected model. This step is skipped if the model is -already downloaded. The selected model comes from the public directory, -which means it must be converted into OpenVINO Intermediate -Representation (OpenVINO IR). - - **NOTE**: Using a model outside the list can require different pre- - and post-processing. - In this case, `person detection model `__ is deployed to detect the person in each frame of the video, and @@ -215,60 +197,39 @@ model `__, -``person-reidentification-retail-xxx`` from `Reidentification Models -list `__), -replace the name of the model in the code below. - .. code:: ipython3 + from notebook_utils import download_ir_model + # A directory where the model will be downloaded. base_model_dir = "model" precision = "FP16" # The name of the model from Open Model Zoo detection_model_name = "person-detection-0202" - download_command = ( - f"omz_downloader " f"--name {detection_model_name} " f"--precisions {precision} " f"--output_dir {base_model_dir} " f"--cache_dir {base_model_dir}" - ) - ! $download_command - detection_model_path = f"model/intel/{detection_model_name}/{precision}/{detection_model_name}.xml" + download_det_model_url = ( + f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{detection_model_name}/{precision}/{detection_model_name}.xml" + ) + detection_model_path = download_ir_model(download_det_model_url, Path(base_model_dir) / detection_model_name / precision) reidentification_model_name = "person-reidentification-retail-0287" + download_reid_model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{reidentification_model_name}/{precision}/{reidentification_model_name}.xml" - download_command = ( - f"omz_downloader " f"--name {reidentification_model_name} " f"--precisions {precision} " f"--output_dir {base_model_dir} " f"--cache_dir {base_model_dir}" - ) - ! $download_command + reidentification_model_path = download_ir_model(download_reid_model_url, Path(base_model_dir) / reidentification_model_name / precision) - reidentification_model_path = f"model/intel/{reidentification_model_name}/{precision}/{reidentification_model_name}.xml" .. parsed-literal:: - ################|| Downloading person-detection-0202 ||################ - - ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.xml - - - ========== Downloading model/intel/person-detection-0202/FP16/person-detection-0202.bin - + model/person-detection-0202/FP16/person-detection-0202.bin: 0%| | 0.00/3.47M [00:00=3.9.2, but you have protobuf 5.28.2 which is incompatible. - open-clip-torch 2.22.0 requires protobuf<4, but you have protobuf 5.28.2 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.28.2 which is incompatible. - tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 5.28.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -89,10 +84,10 @@ Select Model -The tutorial supports the following models from Phi-3 model family: - -`Phi-3.5-vision-instruct `__ -- -`Phi-3-vision-128k-instruct `__ +The tutorial supports the following models from Phi-3 model family: + +- `Phi-3.5-vision-instruct `__ +- `Phi-3-vision-128k-instruct `__ You can select one from the provided options below. @@ -265,10 +260,10 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! seq_len = seq_len or torch.max(position_ids) + 1 /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:85: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:690: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:702: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -391,13 +401,13 @@ documentation =3.9.2, but you have protobuf 3.20.3 which is incompatible. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. - supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. + parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -200,10 +197,10 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-10-08 03:26:26.755777: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:26:26.790097: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:22:09.727876: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:22:09.761823: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:26:27.482079: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 02:22:10.482979: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -224,15 +221,6 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: Loading PhotoMaker components [1] id_encoder from [/opt/home/k8sworker/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/f68f8e6309bf213d28d68230abff0ccc92de9f30]... - - -.. parsed-literal:: - - The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. - - -.. parsed-literal:: - Loading PhotoMaker components [2] lora_weights from [/opt/home/k8sworker/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/f68f8e6309bf213d28d68230abff0ccc92de9f30] @@ -401,20 +389,23 @@ output(text embeddings) which will be the input for U-Net model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:243: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}" .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (151 / 151) │ 100% (151 / 151) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (151 / 151) │ 100% (151 / 151) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -478,20 +469,20 @@ sequence of latent text embeddings. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (73 / 73) │ 100% (73 / 73) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (73 / 73) │ 100% (73 / 73) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -510,11 +501,11 @@ sequence of latent text embeddings. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (194 / 194) │ 100% (194 / 194) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (194 / 194) │ 100% (194 / 194) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -584,26 +575,26 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (794 / 794) │ 100% (794 / 794) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (794 / 794) │ 100% (794 / 794) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -659,11 +650,11 @@ VAE decoder. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index 631e392394cfed..c1c9a4b4e8ec57 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,10 +118,10 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-10-08 03:34:29.221746: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 03:34:29.256826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 02:30:04.644117: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 02:30:04.680089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 03:34:29.928193: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 02:30:05.360275: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -130,12 +130,6 @@ directly in latent space, achieving super fast inference with few steps. Loading pipeline components...: 0%| | 0/5 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 + Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: + ['caption_projection.y_embedding'] @@ -233,8 +229,9 @@ Convert text encoder .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4664: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. Convert transformer @@ -275,11 +272,11 @@ Convert transformer .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.height != height or self.width != width: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if current_length != target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.shape[0] < batch_size * head_size: @@ -304,9 +301,9 @@ Convert VAE decoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -452,7 +449,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -567,7 +564,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -617,13 +614,12 @@ layers and (2) activations of other layers. The steps are the following: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino INFO:nncf:1 ignored nodes were found by types in the NNCFGraph - INFO:nncf:1 ignored nodes were found by types in the NNCFGraph INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (290 / 290) │ 100% (290 / 290) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (290 / 290) │ 100% (290 / 290) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1551,13 +1547,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 3% (3 / 194) │ 0% (0 / 191) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 97% (191 / 194) │ 100% (191 / 191) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 3% (3 / 194) │ 0% (0 / 191) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 97% (191 / 194) │ 100% (191 / 191) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1576,13 +1572,13 @@ applied to footprint reduction. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 98% (37 / 40) │ 0% (0 / 3) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_sym │ 2% (3 / 40) │ 100% (3 / 3) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 98% (37 / 40) │ 0% (0 / 3) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 2% (3 / 40) │ 100% (3 / 3) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -1626,16 +1622,16 @@ pipelines. Loading pipeline components...: 0%| | 0/5 [00:00=2.1" torchvision "pillow" "tqdm" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/llava_model" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "nncf>=2.13.0" "openvino>=2024.4" %pip install -q "transformers>=4.45.0" --extra-index-url https://download.pytorch.org/whl/cpu @@ -70,10 +70,6 @@ Prerequisites Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torch==1.13.1, but you have torch 2.2.2+cpu which is incompatible. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - parler-tts 0.2 requires transformers<=4.43.3,>=4.43.0, but you have transformers 4.45.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -157,27 +153,21 @@ documentation True + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:458: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32) - Using framework PyTorch: 2.2.2+cpu [ WARNING ] Unexpectedly found already patched module language_model.model.embed_tokens while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. [ WARNING ] Unexpectedly found already patched module language_model.model.layers.0.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. @@ -460,48 +450,42 @@ documentation False [ WARNING ] Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. Export model to OpenVINO directly failed with: - Config dummy inputs are not a subset of the model inputs: {'input'} vs {'args', 'kwargs'}. + Config dummy inputs are not a subset of the model inputs: {'input'} vs {'kwargs', 'args'}. Model will be exported to ONNX - Using framework PyTorch: 2.2.2+cpu - Overriding 1 configuration item(s) - - use_cache -> False - Saving external data to one file... + Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 6% (1 / 281) │ 0% (0 / 280) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 94% (280 / 281) │ 100% (280 / 280) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:07 • 0:00:00 + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (1 / 281) │ 0% (0 / 280) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (280 / 281) │ 100% (280 / 280) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:12 • 0:00:00 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 6% (3 / 172) │ 0% (0 / 169) │ - ├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ int4_asym │ 94% (169 / 172) │ 100% (169 / 169) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 6% (3 / 172) │ 0% (0 / 169) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 94% (169 / 172) │ 100% (169 / 169) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:12 • 0:00:00 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Weight compression mode │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ int8_asym │ 100% (1 / 1) │ 0% (0 / 0) │ - ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:02 • 0:00:00 + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (1 / 1) │ 0% (0 / 0) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:03 • 0:00:00 Run model inference @@ -561,13 +545,10 @@ Intel can be found in .. parsed-literal:: - 2024-10-08 04:29:37.124362: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 04:29:37.158372: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 03:26:01.941542: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 03:26:01.977558: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 04:29:37.816800: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Compiling the Language model to CPU ... - Compiling the Text embeddings model to CPU ... - Compiling the vision_embeddings to CPU ... + 2024-11-05 03:26:02.650242: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -611,7 +592,7 @@ Intel can be found in .. parsed-literal:: - The unusual aspect of this image is the presence of a cat and a dog lying together peacefully inside a cardboard box. This is not a common sight, as cats and dogs are often perceived as being natural enemies or at least not inclined to share spaces closely. The image portrays a harmonious and playful interaction between the two animals, which challenges typical stereotypes about their relationship. + The unusual aspect of this image is that the cat is lying inside a cardboard box, which is not a typical setting for a cat. Cats are often known for their affinity for boxes, but it is still considered unusual to see a cat comfortably resting inside a box in a living room setting. The cat appears relaxed and content, which adds to the charm of the scene. The presence of a sofa in the background further emphasizes the domestic and cozy atmosphere of the image. Interactive demo @@ -638,6 +619,8 @@ Interactive demo Running on local URL: http://127.0.0.1:7860 + Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png index acdee31b5fc986..3f1a050bb6d19d 100644 --- a/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png +++ b/docs/notebooks/pose-estimation-with-output_files/pose-estimation-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c27bcedcb93b8775d73cdf416666ce2517ea493b556386f5f7ccd35d53ed15c3 -size 108140 +oid sha256:7d8df8beaa1c9d21dbe44f77fdb5780dd8df21cb1dfa88b9ff82e75b4d524e88 +size 108054 diff --git a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst index 906f8bbc4434de..3bc3566c94068e 100644 --- a/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst +++ b/docs/notebooks/pytorch-onnx-to-openvino-with-output.rst @@ -557,9 +557,9 @@ performance. .. parsed-literal:: - PyTorch model on CPU: 0.039 seconds per image, FPS: 25.34 - ONNX model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 56.97 - OpenVINO IR model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 55.62 + PyTorch model on CPU: 0.042 seconds per image, FPS: 23.97 + ONNX model in OpenVINO Runtime/AUTO: 0.018 seconds per image, FPS: 54.95 + OpenVINO IR model in OpenVINO Runtime/AUTO: 0.028 seconds per image, FPS: 36.18 **Show Device Information** @@ -588,7 +588,7 @@ References - `Torchvision `__ - `Pytorch ONNX Documentation `__ -- `PIP install openvino-dev `__ +- `PIP install openvino `__ - `OpenVINO ONNX support `__ - `Model Conversion API diff --git a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst index d2b770f4402052..87bb315242e0e5 100644 --- a/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst +++ b/docs/notebooks/pytorch-post-training-quantization-nncf-with-output.rst @@ -159,7 +159,7 @@ Settings .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') @@ -449,15 +449,15 @@ I. Evaluate the loaded model .. parsed-literal:: - Test: [ 0/79] Time 0.290 (0.290) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) - Test: [10/79] Time 0.227 (0.240) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) - Test: [20/79] Time 0.232 (0.240) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) - Test: [30/79] Time 0.243 (0.239) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) - Test: [40/79] Time 0.251 (0.240) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) - Test: [50/79] Time 0.234 (0.240) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) - Test: [60/79] Time 0.242 (0.242) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) - Test: [70/79] Time 0.240 (0.242) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) - * Acc@1 60.740 Acc@5 83.960 Total time: 18.879 + Test: [ 0/79] Time 0.294 (0.294) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) + Test: [10/79] Time 0.266 (0.262) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) + Test: [20/79] Time 0.316 (0.283) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) + Test: [30/79] Time 0.250 (0.277) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) + Test: [40/79] Time 0.253 (0.271) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) + Test: [50/79] Time 0.251 (0.268) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) + Test: [60/79] Time 0.252 (0.265) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) + Test: [70/79] Time 0.255 (0.264) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) + * Acc@1 60.740 Acc@5 83.960 Total time: 20.519 Test accuracy of FP32 model: 60.740 @@ -500,10 +500,10 @@ Guide `__ - -**Table of contents:** +`NNCF `__ #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ @@ -79,11 +78,11 @@ Prerequisites from pathlib import Path import requests - + if not Path("ov_qwen2_audio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/qwen2-audio/ov_qwen2_audio_helper.py") open("ov_qwen2_audio_helper.py", "w").write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -212,13 +211,13 @@ documentation target_length: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32) .. parsed-literal:: @@ -342,13 +350,13 @@ documentation `__ .. code:: ipython3 from ov_qwen2_audio_helper import OVQwen2AudioForConditionalGeneration - + # Uncomment below lines to see the model inference class code # OVQwen2AudioForConditionalGeneration?? .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="AUTO", exclude=["NPU"]) - + device @@ -423,20 +431,20 @@ Run model inference from transformers import AutoProcessor, TextStreamer import librosa import IPython.display as ipd - - + + processor = AutoProcessor.from_pretrained(model_dir) - + audio_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac" audio_chat_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav" audio_file = Path(audio_url.split("/")[-1]) audio_chat_file = Path(audio_chat_url.split("/")[-1]) - + if not audio_file.exists(): r = requests.get(audio_url) with audio_file.open("wb") as f: f.write(r.content) - + if not audio_chat_file.exists(): r = requests.get(audio_chat_url) with audio_chat_file.open("wb") as f: @@ -458,14 +466,14 @@ Voice chat ], }, ] - + text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) audios = [librosa.load(audio_chat_file, sr=processor.feature_extractor.sampling_rate)[0]] - + inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True) display(ipd.Audio(audio_chat_file)) print("Answer:") - + generate_ids = ov_model.generate(**inputs, max_new_tokens=50, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)) @@ -477,7 +485,7 @@ Voice chat .. raw:: html - + - + .. parsed-literal:: @@ -367,10 +353,10 @@ can be found in the Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. + - -Download and convert model to OpenVINO IR via Optimum Intel CLI ---------------------------------------------------------------- +Convert model to OpenVINO IR via Optimum Intel CLI +-------------------------------------------------- @@ -401,60 +387,13 @@ documentation `__. import logging import nncf - import os - from IPython.display import display, Markdown + from cmd_helper import optimum_cli nncf.set_log_level(logging.ERROR) model_path = Path(model_id.value.split("/")[1]) - export_command = f"optimum-cli export openvino --model {model_id.value} --library transformers --task automatic-speech-recognition-with-past --framework pt {str(model_path)}" - - display(Markdown("**Export command:**")) - display(Markdown(f"`{export_command}`")) - - exit_code = os.system(export_command) - if exit_code != 0: - raise Exception("Failed to load and convert model!") - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - - -**Export command:** - - - -``optimum-cli export openvino --model openai/whisper-tiny --library transformers --task automatic-speech-recognition-with-past --framework pt whisper-tiny`` - - -.. parsed-literal:: - - 2024-10-08 06:43:10.758697: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Moving the following attributes in the config to the generation config: {'max_length': 448, 'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config. - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> False - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:1071: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input_features.shape[-1] != expected_seq_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:388: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/whisper/modeling_whisper.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if sequence_length != 1: - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:447: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - or len(self.key_cache[layer_idx]) == 0 # the layer has no cache - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. - elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors - + optimum_cli(model_id.value, model_path) + print(f"✅ {model_id.value} model converted and can be found in {model_path}") Run inference OpenVINO model with WhisperPipeline ------------------------------------------------- @@ -496,11 +435,9 @@ and put array as input. .. code:: ipython3 - sample = copy.deepcopy(en_raw_speech) + genai_result = ov_pipe.generate(en_raw_speech) - genai_result = ov_pipe.generate(sample) - - display(ipd.Audio(sample, rate=samplerate)) + display(ipd.Audio(en_raw_speech, rate=samplerate)) print(f"Result: {genai_result}") @@ -509,16 +446,61 @@ and put array as input. - + .. parsed-literal:: - Result: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. + Result: Colonel Jessif, did you order the code rate? You don't have to answer that question. I'll answer the question. You want answers? I think I'm entitled. You want answers? I want the truth. You can't handle the truth. + + +Whisper could provide a phrase-level timestamps for audio. Let’s try +this scenario, we will specify ``return_timestamps=True`` for +``generate`` method. + +``generate`` method with ``return_timestamps`` set to ``True`` will +return ``chunks``, which contain attributes: ``text``, ``start_ts`` and +``end_ts`` in seconds. + +.. code:: ipython3 + + genai_result_timestamps = ov_pipe.generate(en_raw_speech, return_timestamps=True) + + for segment in genai_result_timestamps.chunks: + print(f"{segment.start_ts}sec. ---> {segment.end_ts}sec.") + print(f"{segment.text}\n") + +.. parsed-literal:: + + 0.0sec. ---> 3.0sec. + Colonel Jessif, did you order the code rate? + + 3.0sec. ---> 4.5sec. + You don't have to answer that question. + + 4.5sec. ---> 6.5sec. + I'll answer the question. + + 6.5sec. ---> 8.0sec. + You want answers? + + 8.0sec. ---> 9.0sec. + I think I'm entitled. + + 9.0sec. ---> 10.0sec. + You want answers? + + 10.0sec. ---> 11.0sec. + I want the truth. + + 11.0sec. ---> 13.0sec. + You can't handle the truth. + + Let’s see how to work the ``translate`` task. It supports for multilingual models only. For that case we will specify ``language`` and @@ -542,7 +524,7 @@ format. } if model_type.value == "Multilingual models": - sample = copy.deepcopy(mls_example["audio"]) + sample = mls_example["audio"] genai_result_ml = ov_pipe.generate(sample["array"], max_new_tokens=100, task="translate", language=languages_genai[SAMPLE_LANG.value]) @@ -559,7 +541,7 @@ format. Your browser does not support the audio element. - + .. parsed-literal:: @@ -567,7 +549,7 @@ format. Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale. Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class. - + Compare performance PyTorch vs OpenVINO --------------------------------------- @@ -621,10 +603,10 @@ Compare performance PyTorch vs OpenVINO .. parsed-literal:: - Mean torch openai/whisper-tiny generation time: 0.273s - Mean openvino openai/whisper-tiny generation time: 0.166s - Performance openai/whisper-tiny openvino speedup: 1.650 - + Mean torch openai/whisper-tiny generation time: 0.564s + Mean openvino openai/whisper-tiny generation time: 0.311s + Performance openai/whisper-tiny openvino speedup: 1.815 + Quantization ------------ @@ -706,13 +688,7 @@ Below is an example of the whisper-tiny model Like the original PyTorch model, the OpenVINO model is also compatible with HuggingFace `pipeline `__ -interface for ``automatic-speech-recognition``. Pipeline can be used for -long audio transcription. Distil-Whisper uses a chunked algorithm to -transcribe long-form audio files. In practice, this chunked long-form -algorithm is 9x faster than the sequential algorithm proposed by OpenAI -in the Whisper paper. To enable chunking, pass the chunk_length_s -parameter to the pipeline. For Distil-Whisper, a chunk length of 15 -seconds is optimal. To activate batching, pass the argument batch_size. +interface for ``automatic-speech-recognition``. .. code:: ipython3 @@ -721,14 +697,6 @@ seconds is optimal. To activate batching, pass the argument batch_size. ov_model = OVModelForSpeechSeq2Seq.from_pretrained(str(model_path), device=device.value) ov_processor = AutoProcessor.from_pretrained(str(model_path)) - -.. parsed-literal:: - - Compiling the encoder to CPU ... - Compiling the decoder to CPU ... - Compiling the decoder to CPU ... - - Prepare calibration datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -797,14 +765,6 @@ negligible. from datasets import load_dataset from tqdm.notebook import tqdm - def extract_input_features(sample): - input_features = processor( - sample["audio"]["array"], - sampling_rate=sample["audio"]["sampling_rate"], - return_tensors="pt", - ).input_features - return input_features - CALIBRATION_DATASET_SIZE = 30 @@ -868,127 +828,6 @@ negligible. ov_quantized_pipe = quantize(ov_model, CALIBRATION_DATASET_SIZE) - - -.. parsed-literal:: - - Collecting calibration data: 0%| | 0/30 [00:00 - + Your browser does not support the audio element. - + .. parsed-literal:: - Original : Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. - Quantized: Mr Quilder is the apostle of the middle classes and we are glad to welcome his gospel. - + Original : Colonel Jessif, did you order the code rate? You don't have to answer that question. I'll answer the question. You want answers? I think I'm entitled. You want answers? I want the truth. You can't handle the truth. + Quantized: Don, I'll just, if you order the code right. You don have to answer that question. I'll answer the question. You want answers. I think I'm entitled you want answer. I want the truth. You can't handle the truth. You can't handle the truth. + Compare performance and accuracy of the original and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1096,10 +933,10 @@ for Word Error Rate. .. parsed-literal:: - Whole pipeline performance speedup: 1.381 + Whole pipeline performance speedup: 1.350 Whisper transcription word accuracy. Original model: 82.88%. Quantized model: 84.13%. Accuracy drop: -1.25%. - + Interactive demo ---------------- @@ -1115,35 +952,21 @@ upload button) or record using your microphone. import requests if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/distil-whisper-asr/gradio_helper.py") + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/whisper-asr-genai/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) from gradio_helper import make_demo, GradioPipeline pipe = ov_quantized_pipe if to_quantize.value else ov_pipe - gr_pipeline = GradioPipeline(pipe, multilingual=(not model_id.value.endswith(".en")), quantized=to_quantize.value) + gr_pipeline = GradioPipeline(pipe, model_id.value, quantized=to_quantize.value) demo = make_demo(gr_pipeline) try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(share=True, debug=False) + demo.launch(share=True, debug=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/whisper-subtitles-generation-with-output.rst b/docs/notebooks/whisper-subtitles-generation-with-output.rst index ad479f160f3453..b1e16ffa40c626 100644 --- a/docs/notebooks/whisper-subtitles-generation-with-output.rst +++ b/docs/notebooks/whisper-subtitles-generation-with-output.rst @@ -18,8 +18,11 @@ blog `__, `model card `__ and GitHub `repository `__. -In this notebook, we will use Whisper with OpenVINO to generate -subtitles in a sample video. Additionally, we will use +In this notebook, we will use Whisper model with `OpenVINO Generate +API `__ for `Whisper +automatic speech recognition +scenarios `__ +to generate subtitles in a sample video. Additionally, we will use `NNCF `__ improving model performance by INT8 quantization. Notebook contains the following steps: 1. Download the model. 2. Instantiate the PyTorch model pipeline. 3. @@ -75,11 +78,29 @@ Install dependencies. .. code:: ipython3 - %pip install -q "openvino>=2024.1.0" "nncf>=2.10.0" - %pip install -q "python-ffmpeg<=1.0.16" moviepy "onnx!=1.16.2" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "yt_dlp>=2024.8.6" soundfile librosa jiwer + %pip install -q "nncf>=2.13.0" + %pip install -q --pre -U "openvino" "openvino-tokenizers" "openvino-genai" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -q "python-ffmpeg<=1.0.16" "ffmpeg" "moviepy" "transformers>=4.45" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q -U "yt_dlp>=2024.8.6" soundfile librosa jiwer %pip install -q "gradio>=4.19" +.. code:: ipython3 + + import requests + from pathlib import Path + + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + if not Path("cmd_helper.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + Instantiate model ----------------- @@ -135,7 +156,7 @@ Whisper family. .. parsed-literal:: - Dropdown(description='Model:', index=6, options=('openai/whisper-large-v3', 'openai/whisper-large-v2', 'openai… + Dropdown(description='Model:', index=7, options=('openai/whisper-large-v3-turbo', 'openai/whisper-large-v3', '… @@ -144,49 +165,37 @@ Convert model to OpenVINO Intermediate Representation (IR) format using Optimum- -The Hugging Face Optimum API is a high-level API that enables us to -convert and quantize models from the Hugging Face Transformers library -to the OpenVINO™ IR format. For more details, refer to the `Hugging Face -Optimum -documentation `__. +Listed Whisper model are available for downloading via the `HuggingFace +hub `__. We will use optimum-cli +interface for exporting it into OpenVINO Intermediate Representation +(IR) format. -Optimum Intel can be used to load optimized models from the `Hugging -Face Hub `__ and -create pipelines to run an inference with OpenVINO Runtime using Hugging -Face APIs. The Optimum Inference models are API compatible with Hugging -Face Transformers models. This means we just need to replace the -``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` -class. +Optimum CLI interface for converting models supports export to OpenVINO +(supported starting optimum-intel 1.12 version). General command format: -Below is an example of the whisper-tiny model +.. code:: bash -.. code:: diff - - -from transformers import AutoModelForSpeechSeq2Seq - +from optimum.intel.openvino import OVModelForSpeechSeq2Seq - from transformers import AutoTokenizer, pipeline + optimum-cli export openvino --model --task - model_id = "openai/whisper-tiny" - -model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) - +model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True) - -Model class initialization starts with calling the ``from_pretrained`` -method. When downloading and converting the Transformers model, the -parameter ``export=True`` should be added. We can save the converted -model for the next usage with the ``save_pretrained`` method. -Alternatively, model conversion can be performed using Optimum-CLI -interface. You can find more details about Optimum-Intel and Optimum CLI -usage in this `tutorial `__. -The command bellow illustrates how to convert whisper using optimum cli. +where ``--model`` argument is model id from HuggingFace Hub or local +directory with model (saved using ``.save_pretrained`` method), +``--task`` is one of `supported +task `__ +that exported model should solve. For LLMs it will be +``automatic-speech-recognition-with-past``. If model initialization +requires to use remote code, ``--trust-remote-code`` flag additionally +should be passed. Full list of supported arguments available via +``--help`` For more details and examples of usage, please check `optimum +documentation `__. .. code:: ipython3 - from pathlib import Path + from cmd_helper import optimum_cli model_dir = model_id.value.split("/")[-1] if not Path(model_dir).exists(): - !optimum-cli export openvino -m {model_id.value} {model_dir} --weight-format fp16 + optimum_cli(model_id.value, model_dir) Prepare inference pipeline -------------------------- @@ -201,24 +210,12 @@ Whisper model. whisper_pipeline.png -Preprocessing and post-processing are important in this model use. -``transformers.AutoProcessor`` class used for initialization -``WhisperProcessor`` is responsible for preparing audio input data for -the PyTorch model, converting it to Mel-spectrogram and decoding -predicted output token_ids into string using tokenizer. Tokenizers and -Processors are distributed with models also compatible with the OpenVINO -model. - -Like the original PyTorch model, the OpenVINO model is also compatible -with HuggingFace -`pipeline `__ -interface for ``automatic-speech-recognition``. Pipeline can be used for -long audio transcription. Distil-Whisper uses a chunked algorithm to -transcribe long-form audio files. In practice, this chunked long-form -algorithm is 9x faster than the sequential algorithm proposed by OpenAI -in the Whisper paper. To enable chunking, pass the chunk_length_s -parameter to the pipeline. For Distil-Whisper, a chunk length of 15 -seconds is optimal. To activate batching, pass the argument batch_size. +To simplify user experience we will use `OpenVINO Generate +API `__. +Firstly we will create pipeline with ``WhisperPipeline``. You can +construct it straight away from the folder with the converted model. It +will automatically load the ``model``, ``tokenizer``, ``detokenizer`` +and default ``generation configuration``. Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -229,22 +226,9 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import openvino as ov - - core = ov.Core() - -.. code:: ipython3 - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - from notebook_utils import device_widget - device = device_widget() + device = device_widget(default="CPU", exclude=["NPU"]) device @@ -253,78 +237,46 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') .. code:: ipython3 - from optimum.intel.openvino import OVModelForSpeechSeq2Seq - from transformers import AutoProcessor, pipeline - - ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_dir, device=device.value) - - processor = AutoProcessor.from_pretrained(model_dir) + import openvino_genai - pipe = pipeline( - "automatic-speech-recognition", - model=ov_model, - chunk_length_s=30, - tokenizer=processor.tokenizer, - feature_extractor=processor.feature_extractor, - ) + ov_pipe = openvino_genai.WhisperPipeline(str(model_dir), device=device.value) Run video transcription pipeline -------------------------------- -Now, we are ready to start transcription. We select a video from YouTube -that we want to transcribe. Be patient, as downloading the video may -take some time. +Now, we are ready to start transcription. Let’s load the video first. .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import download_file - VIDEO_LINK = "https://youtu.be/kgL5LBM-hFI" - link = widgets.Text( - value=VIDEO_LINK, - placeholder="Type link for video", - description="Video:", - disabled=False, - ) + output_file = Path("downloaded_video.mp4") - link - - + download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Sheldon%20Cooper%20Jim%20Parsons%20at%20Intels%20Lab.mp4", + filename=output_file.name, + ) .. parsed-literal:: - Text(value='https://youtu.be/kgL5LBM-hFI', description='Video:', placeholder='Type link for video') + 'downloaded_video.mp4' already exists. -.. code:: ipython3 - - from pathlib import Path - import yt_dlp - - print(f"Downloading video {link.value} started") - - output_file = Path("downloaded_video.mp4") - ydl_ops = {"format": "best[ext=mp4]", "outtmpl": output_file.as_posix()} - with yt_dlp.YoutubeDL(ydl_ops) as ydl: - ydl.download(link.value) - - print(f"Video saved to {output_file}") - .. parsed-literal:: - Downloading video https://youtu.be/kgL5LBM-hFI started - Video saved to downloaded_video.mp4 + PosixPath('/home/labuser/work/notebook/openvino_notebooks/notebooks/whisper-subtitles-generation/downloaded_video.mp4') + Select the task for the model: @@ -377,17 +329,31 @@ Select the task for the model: input_video.audio.write_audiofile(audio_file, verbose=False, logger=None) with open(audio_file, "rb") as f: inputs = f.read() - audio = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate) + audio = ffmpeg_read(inputs, 16000) return { "raw": audio, - "sampling_rate": pipe.feature_extractor.sampling_rate, + "sampling_rate": 16000, }, duration +Let’s run generation method. We will put input data as ``np array``. +Also we will specify ``task`` and ``return_timestamps=True`` options. If +task is ``translate``, you can place ``language`` option, for example +``<|fr|>`` for French or it would be detect automatically. We can set up +generation parameters in different ways. We can get default config with +``get_generation_config()``, setup parameters and put config directly to +``generate()``. It’s also possible to specify the needed options just as +inputs in the ``generate()`` method and we will use this way. Then we +just run ``generate`` method and get the output in text format. + +``generate`` method with ``return_timestamps`` set to ``True`` will +return ``chunks``, which contain attributes: ``text``, ``start_ts`` and +``end_ts`` + .. code:: ipython3 inputs, duration = get_audio(output_file) - transcription = pipe(inputs, generate_kwargs={"task": task.value}, return_timestamps=True)["chunks"] + transcription = ov_pipe.generate(inputs["raw"], task=task.value, return_timestamps=True).chunks .. code:: ipython3 @@ -419,18 +385,19 @@ Select the task for the model: """ segment_lines = [] for idx, segment in enumerate(transcription): + timestamp = (segment.start_ts, segment.end_ts) # for the case where the model could not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. - if segment["timestamp"][1] is None: - segment["timestamp"] = (segment["timestamp"][0], filter_duration) + if segment.end_ts == -1: + timestamp[1] = filter_duration - if filter_duration is not None and (segment["timestamp"][0] >= math.floor(filter_duration) or segment["timestamp"][1] > math.ceil(filter_duration) + 1): + if filter_duration is not None and (timestamp[0] >= math.floor(filter_duration) or timestamp[1] > math.ceil(filter_duration) + 1): break segment_lines.append(str(idx + 1) + "\n") - time_start = format_timestamp(segment["timestamp"][0]) - time_end = format_timestamp(segment["timestamp"][1]) + time_start = format_timestamp(timestamp[0]) + time_end = format_timestamp(timestamp[1]) time_str = f"{time_start} --> {time_end}\n" segment_lines.append(time_str) - segment_lines.append(segment["text"] + "\n\n") + segment_lines.append(segment.text + "\n\n") return segment_lines "The results will be saved in the ``downloaded_video.srt`` file. SRT is @@ -457,7 +424,7 @@ Now let us see the results. .. parsed-literal:: - Video(value=b"\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00:'moov\x00\x00\x00lmvhd...", height='800… + Video(value=b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00Aimoov\x00\x00\x00lmvhd...', height='800… @@ -565,6 +532,42 @@ Please select below whether you would like to run Whisper quantization. %load_ext skip_kernel_extension +Let’s load converted OpenVINO model format using Optimum-Intel to easily +quantize it. + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ or +local folder to create pipelines to run an inference with OpenVINO +Runtime using Hugging Face APIs. The Optimum Inference models are API +compatible with Hugging Face Transformers models. This means we just +need to replace the ``AutoModelForXxx`` class with the corresponding +``OVModelForXxx`` class. + +Below is an example of the whisper-tiny model + +.. code:: diff + + -from transformers import AutoModelForSpeechSeq2Seq + +from optimum.intel.openvino import OVModelForSpeechSeq2Seq + from transformers import AutoTokenizer, pipeline + + model_id = "openai/whisper-tiny" + -model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) + +model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True) + +Like the original PyTorch model, the OpenVINO model is also compatible +with HuggingFace +`pipeline `__ +interface for ``automatic-speech-recognition``. + +.. code:: ipython3 + + from transformers import AutoProcessor + from optimum.intel.openvino import OVModelForSpeechSeq2Seq + + ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_dir, device=device.value) + processor = AutoProcessor.from_pretrained(model_dir) + Prepare calibration datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -583,6 +586,9 @@ improves quantization quality. %%skip not $to_quantize.value from itertools import islice + from tqdm.notebook import tqdm + from datasets import load_dataset + from transformers import pipeline from optimum.intel.openvino.quantization import InferRequestWrapper @@ -629,28 +635,16 @@ negligible. import gc import shutil import nncf - from datasets import load_dataset - from tqdm.notebook import tqdm - - def extract_input_features(sample): - input_features = processor( - sample["audio"]["array"], - sampling_rate=sample["audio"]["sampling_rate"], - return_tensors="pt", - ).input_features - return input_features - + import openvino as ov - CALIBRATION_DATASET_SIZE = 50 + CALIBRATION_DATASET_SIZE = 30 quantized_model_path = Path(f"{model_dir}_quantized") def quantize(ov_model: OVModelForSpeechSeq2Seq, calibration_dataset_size: int): if not quantized_model_path.exists(): - encoder_calibration_data, decoder_calibration_data = collect_calibration_dataset( - ov_model, calibration_dataset_size - ) + encoder_calibration_data, decoder_calibration_data = collect_calibration_dataset(ov_model, calibration_dataset_size) print("Quantizing encoder") quantized_encoder = nncf.quantize( ov_model.encoder.model, @@ -658,7 +652,7 @@ negligible. subset_size=len(encoder_calibration_data), model_type=nncf.ModelType.TRANSFORMER, # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search - advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.50) + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.80), ) ov.save_model(quantized_encoder, quantized_model_path / "openvino_encoder_model.xml") del quantized_encoder @@ -672,7 +666,7 @@ negligible. subset_size=len(decoder_calibration_data), model_type=nncf.ModelType.TRANSFORMER, # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search - advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96) + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96), ) ov.save_model(quantized_decoder_with_past, quantized_model_path / "openvino_decoder_with_past_model.xml") del quantized_decoder_with_past @@ -685,218 +679,24 @@ negligible. shutil.copy(model_path / "generation_config.json", quantized_model_path / "generation_config.json") shutil.copy(model_path / "openvino_decoder_model.xml", quantized_model_path / "openvino_decoder_model.xml") shutil.copy(model_path / "openvino_decoder_model.bin", quantized_model_path / "openvino_decoder_model.bin") + shutil.copy(model_path / "openvino_tokenizer.xml", quantized_model_path / "openvino_tokenizer.xml") + shutil.copy(model_path / "openvino_tokenizer.bin", quantized_model_path / "openvino_tokenizer.bin") + shutil.copy(model_path / "openvino_detokenizer.xml", quantized_model_path / "openvino_detokenizer.xml") + shutil.copy(model_path / "openvino_detokenizer.bin", quantized_model_path / "openvino_detokenizer.bin") + shutil.copy(model_path / "tokenizer_config.json", quantized_model_path / "tokenizer_config.json") + shutil.copy(model_path / "tokenizer.json", quantized_model_path / "tokenizer.json") + shutil.copy(model_path / "vocab.json", quantized_model_path / "vocab.json") + shutil.copy(model_path / "preprocessor_config.json", quantized_model_path / "preprocessor_config.json") + shutil.copy(model_path / "special_tokens_map.json", quantized_model_path / "special_tokens_map.json") + shutil.copy(model_path / "normalizer.json", quantized_model_path / "normalizer.json") + shutil.copy(model_path / "merges.txt", quantized_model_path / "merges.txt") + shutil.copy(model_path / "added_tokens.json", quantized_model_path / "added_tokens.json") - quantized_ov_model = OVModelForSpeechSeq2Seq.from_pretrained(quantized_model_path, compile=False) - quantized_ov_model.to(device.value) - quantized_ov_model.compile() - return quantized_ov_model + quantized_ov_pipe = openvino_genai.WhisperPipeline(str(quantized_model_path), device=device.value) + return quantized_ov_pipe - ov_quantized_model = quantize(ov_model, CALIBRATION_DATASET_SIZE) - - - -.. parsed-literal:: - - Collecting calibration data: 0%| | 0/50 [00:00 00:00:05,000 - What's that? - - 2 - 00:00:05,000 --> 00:00:07,000 - Oh, wow. - - 3 - 00:00:09,000 --> 00:00:11,000 - Hello humans. - - 4 - 00:00:14,000 --> 00:00:15,000 - Focus on me. - - 5 - 00:00:15,000 --> 00:00:16,000 - Focus on the guard. - - 6 - 00:00:18,000 --> 00:00:20,000 - Don't tell anyone what you're seen in here. - - 7 - 00:00:22,000 --> 00:00:24,000 - Have you seen what's in there? - - 8 - 00:00:24,000 --> 00:00:25,000 - They have intel. - - 9 - 00:00:25,000 --> 00:00:27,000 - This is where it all changes. - - - - Compare performance and accuracy of the original and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -975,9 +726,6 @@ and performance stand-points. To measure accuracy, we use ``1 - WER`` as a metric, where WER stands for Word Error Rate. -When measuring inference time, we do it separately for encoder and -decoder-with-past model forwards, and for the whole model inference too. - .. code:: ipython3 %%skip not $to_quantize.value @@ -986,69 +734,34 @@ decoder-with-past model forwards, and for the whole model inference too. from contextlib import contextmanager from jiwer import wer, wer_standardize - TEST_DATASET_SIZE = 50 - MEASURE_TIME = False - - @contextmanager - def time_measurement(): - global MEASURE_TIME - try: - MEASURE_TIME = True - yield - finally: - MEASURE_TIME = False - - def time_fn(obj, fn_name, time_list): - original_fn = getattr(obj, fn_name) - - def wrapper(*args, **kwargs): - if not MEASURE_TIME: - return original_fn(\*args, \*\*kwargs) - start_time = time.perf_counter() - result = original_fn(\*args, \*\*kwargs) - end_time = time.perf_counter() - time_list.append(end_time - start_time) - return result - - setattr(obj, fn_name, wrapper) def calculate_transcription_time_and_accuracy(ov_model, test_samples): - encoder_infer_times = [] - decoder_with_past_infer_times = [] whole_infer_times = [] - time_fn(ov_model, "generate", whole_infer_times) - time_fn(ov_model.encoder, "forward", encoder_infer_times) - time_fn(ov_model.decoder_with_past, "forward", decoder_with_past_infer_times) ground_truths = [] predictions = [] for data_item in tqdm(test_samples, desc="Measuring performance and accuracy"): - input_features = extract_input_features(data_item) - - with time_measurement(): - predicted_ids = ov_model.generate(input_features) - transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) + start_time = time.perf_counter() + transcription = ov_model.generate(data_item["audio"]["array"], return_timestamps=True) + end_time = time.perf_counter() + whole_infer_times.append(end_time - start_time) ground_truths.append(data_item["text"]) - predictions.append(transcription[0]) + predictions.append(transcription.texts[0]) word_accuracy = (1 - wer(ground_truths, predictions, reference_transform=wer_standardize, hypothesis_transform=wer_standardize)) * 100 mean_whole_infer_time = sum(whole_infer_times) - mean_encoder_infer_time = sum(encoder_infer_times) - mean_decoder_with_time_infer_time = sum(decoder_with_past_infer_times) - return word_accuracy, (mean_whole_infer_time, mean_encoder_infer_time, mean_decoder_with_time_infer_time) + return word_accuracy, mean_whole_infer_time test_dataset = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True, trust_remote_code=True) test_dataset = test_dataset.shuffle(seed=42).take(TEST_DATASET_SIZE) test_samples = [sample for sample in test_dataset] - accuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_model, test_samples) - accuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(ov_quantized_model, test_samples) - print(f"Encoder performance speedup: {times_original[1] / times_quantized[1]:.3f}") - print(f"Decoder with past performance speedup: {times_original[2] / times_quantized[2]:.3f}") - print(f"Whole pipeline performance speedup: {times_original[0] / times_quantized[0]:.3f}") + accuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_pipe, test_samples) + accuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(quantized_ov_pipe, test_samples) + print(f"Whole pipeline performance speedup: {times_original / times_quantized:.3f}") print(f"Whisper transcription word accuracy. Original model: {accuracy_original:.2f}%. Quantized model: {accuracy_quantized:.2f}%.") print(f"Accuracy drop: {accuracy_original - accuracy_quantized:.2f}%.") @@ -1067,11 +780,9 @@ decoder-with-past model forwards, and for the whole model inference too. .. parsed-literal:: - Encoder performance speedup: 1.352 - Decoder with past performance speedup: 1.342 - Whole pipeline performance speedup: 1.350 - Whisper transcription word accuracy. Original model: 81.67%. Quantized model: 83.67%. - Accuracy drop: -1.99%. + Whole pipeline performance speedup: 1.452 + Whisper transcription word accuracy. Original model: 81.77%. Quantized model: 82.97%. + Accuracy drop: -1.20%. Interactive demo @@ -1081,18 +792,26 @@ Interactive demo .. code:: ipython3 - def transcribe(url, task, use_int8): - output_file = Path("downloaded_video.mp4") - ydl_ops = {"format": "best[ext=mp4]", "outtmpl": output_file.as_posix()} - with yt_dlp.YoutubeDL(ydl_ops) as ydl: - ydl.download(link.value) - inputs, duration = get_audio(output_file) - m_pipe = int8_pipe if use_int8 else pipe - transcription = m_pipe(inputs, generate_kwargs={"task": task.lower()}, return_timestamps=True)["chunks"] + def_config = ov_pipe.get_generation_config() + + + def transcribe(video_path, task, use_int8): + data_path = Path(video_path) + inputs, duration = get_audio(data_path) + m_pipe = quantized_ov_pipe if use_int8 else ov_pipe + + frame_num = len(inputs["raw"]) / 16000 + if frame_num > 30: + config = ov_pipe.get_generation_config() + chink_num = math.ceil(frame_num / 30) + config.max_length = chink_num * def_config.max_length + m_pipe.set_generation_config(config) + + transcription = m_pipe.generate(inputs["raw"], task=task.lower(), return_timestamps=True).chunks srt_lines = prepare_srt(transcription, duration) - with output_file.with_suffix(".srt").open("w") as f: + with data_path.with_suffix(".srt").open("w") as f: f.writelines(srt_lines) - return [str(output_file), str(output_file.with_suffix(".srt"))] + return [str(data_path), str(data_path.with_suffix(".srt"))] if not Path("gradio_helper.py").exists(): @@ -1101,7 +820,7 @@ Interactive demo from gradio_helper import make_demo - demo = make_demo(fn=transcribe, quantized=ov_quantized_model is not None) + demo = make_demo(fn=transcribe, quantized=ov_quantized_model is not None, sample_path=output_file) try: demo.launch(debug=False) diff --git a/docs/notebooks/wuerstchen-image-generation-with-output.rst b/docs/notebooks/wuerstchen-image-generation-with-output.rst index 8e61a42f359dad..237bfb8b4bd943 100644 --- a/docs/notebooks/wuerstchen-image-generation-with-output.rst +++ b/docs/notebooks/wuerstchen-image-generation-with-output.rst @@ -229,8 +229,10 @@ parameter to generate a less memory-demanding model. Text encoder model has 2 inputs: -- ``input_ids``: vector of tokenized input sentence. Default tokenizer vector length is 77. -- ``attention_mask``: vector of same length as ``input_ids`` describing the attention mask. +- ``input_ids``: vector of tokenized + input sentence. Default tokenizer vector length is 77. +- ``attention_mask``: vector of same length as ``input_ids`` describing + the attention mask. .. code:: ipython3 diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output.rst b/docs/notebooks/yolov11-instance-segmentation-with-output.rst index 931465dba1e79b..9cbca9fe4f1c37 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output.rst +++ b/docs/notebooks/yolov11-instance-segmentation-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -216,14 +216,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.3MB/s] + 100%|██████████| 5.90M/5.90M [00:00<00:00, 25.2MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 66.0ms - Speed: 2.1ms preprocess, 66.0ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 3 bicycles, 2 cars, 1 motorcycle, 1 dog, 69.6ms + Speed: 2.0ms preprocess, 69.6ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640) @@ -252,15 +252,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (5.9 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-seg_openvino_model/' (6.0 MB) Export complete (2.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=segment model=yolo11n-seg_openvino_model imgsz=640 half Validate: yolo val task=segment model=yolo11n-seg_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -331,7 +331,7 @@ Test on single image .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-seg_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... @@ -345,8 +345,8 @@ Test on single image .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.1ms - Speed: 2.1ms preprocess, 23.1ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 3 bicycles, 2 cars, 1 dog, 23.2ms + Speed: 1.8ms preprocess, 23.2ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) @@ -668,8 +668,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 10.9ms - Speed: 2.0ms preprocess, 10.9ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 11.2ms + Speed: 1.9ms preprocess, 11.2ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640) @@ -717,18 +717,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.71 ms + [ INFO ] Read model took 19.61 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -738,7 +738,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 5.12 ms + [ INFO ] Reshape model took 8.69 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -746,7 +746,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 385.36 ms + [ INFO ] Compile model took 390.55 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -783,17 +783,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 37.32 ms + [ INFO ] First inference took 36.99 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 1794 iterations - [ INFO ] Duration: 15062.45 ms + [ INFO ] Count: 1806 iterations + [ INFO ] Duration: 15074.73 ms [ INFO ] Latency: - [ INFO ] Median: 50.09 ms - [ INFO ] Average: 50.21 ms - [ INFO ] Min: 43.19 ms - [ INFO ] Max: 65.81 ms - [ INFO ] Throughput: 119.10 FPS + [ INFO ] Median: 49.77 ms + [ INFO ] Average: 49.90 ms + [ INFO ] Min: 24.69 ms + [ INFO ] Max: 66.18 ms + [ INFO ] Throughput: 119.80 FPS .. code:: ipython3 @@ -808,11 +808,11 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration @@ -837,7 +837,7 @@ models. [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_8) : f32 / [...] / [1,116,8400] [ INFO ] input.255 (node: __module.model.23.cv4.2.1.act/aten::silu_/Swish_46) : f32 / [...] / [1,32,160,160] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 608.94 ms + [ INFO ] Compile model took 592.55 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -874,17 +874,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 26.24 ms + [ INFO ] First inference took 24.82 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 3636 iterations - [ INFO ] Duration: 15046.89 ms + [ INFO ] Count: 3702 iterations + [ INFO ] Duration: 15040.97 ms [ INFO ] Latency: - [ INFO ] Median: 24.53 ms - [ INFO ] Average: 24.70 ms - [ INFO ] Min: 12.80 ms - [ INFO ] Max: 40.79 ms - [ INFO ] Throughput: 241.64 FPS + [ INFO ] Median: 24.15 ms + [ INFO ] Average: 24.25 ms + [ INFO ] Min: 20.10 ms + [ INFO ] Max: 43.05 ms + [ INFO ] Throughput: 246.13 FPS Other ways to optimize model diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png index 32694d34889741..737d0af981d026 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png +++ b/docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_46_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ceb4f1951cf92d0a2365750af32b945a90f65102311933328225f2dbd7b802c1 -size 496675 +oid sha256:f7d6739facd4204cf94a8bb5d471ea56a2ce3eab4dcd1e3807ba40c8e43baa44 +size 502582 diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output.rst b/docs/notebooks/yolov11-keypoint-detection-with-output.rst index cc830279de6753..cf94af9ab1e794 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output.rst +++ b/docs/notebooks/yolov11-keypoint-detection-with-output.rst @@ -143,7 +143,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg') @@ -214,14 +214,14 @@ Let us consider the examples: .. parsed-literal:: - 100%|██████████| 5.97M/5.97M [00:00<00:00, 25.1MB/s] + 100%|██████████| 5.97M/5.97M [00:00<00:00, 21.1MB/s] .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 56.4ms - Speed: 1.9ms preprocess, 56.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 480x640 1 person, 62.6ms + Speed: 1.8ms preprocess, 62.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640) @@ -250,15 +250,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n-pose.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 56, 8400) (6.0 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... - OpenVINO: export success ✅ 1.9s, saved as 'yolo11n-pose_openvino_model/' (6.0 MB) + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... + OpenVINO: export success ✅ 2.0s, saved as 'yolo11n-pose_openvino_model/' (6.0 MB) - Export complete (2.2s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Export complete (2.1s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=pose model=yolo11n-pose_openvino_model imgsz=640 half Validate: yolo val task=pose model=yolo11n-pose_openvino_model imgsz=640 data=/ultralytics/ultralytics/cfg/datasets/coco-pose.yaml half Visualize: https://netron.app @@ -334,12 +334,12 @@ ready to check model prediction. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 21.5ms - Speed: 2.5ms preprocess, 21.5ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 20.1ms + Speed: 2.8ms preprocess, 20.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -682,12 +682,12 @@ on the image. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n-pose_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 31.0ms - Speed: 2.1ms preprocess, 31.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/intel_rnb.jpg: 640x640 1 person, 31.4ms + Speed: 2.1ms preprocess, 31.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -737,18 +737,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.80 ms + [ INFO ] Read model took 19.20 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -757,14 +757,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 5.17 ms + [ INFO ] Reshape model took 8.54 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 338.05 ms + [ INFO ] Compile model took 338.50 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -801,17 +801,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 33.00 ms + [ INFO ] First inference took 33.80 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 17292 iterations - [ INFO ] Duration: 120045.86 ms + [ INFO ] Count: 17082 iterations + [ INFO ] Duration: 120040.01 ms [ INFO ] Latency: - [ INFO ] Median: 40.71 ms - [ INFO ] Average: 41.52 ms - [ INFO ] Min: 29.33 ms - [ INFO ] Max: 102.50 ms - [ INFO ] Throughput: 144.04 FPS + [ INFO ] Median: 40.53 ms + [ INFO ] Average: 42.03 ms + [ INFO ] Min: 30.12 ms + [ INFO ] Max: 171.39 ms + [ INFO ] Throughput: 142.30 FPS .. code:: ipython3 @@ -827,18 +827,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.03 ms + [ INFO ] Read model took 28.43 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -854,7 +854,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_9) : f32 / [...] / [1,56,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 558.98 ms + [ INFO ] Compile model took 573.24 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -891,17 +891,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 31.29 ms + [ INFO ] First inference took 25.90 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5184 iterations - [ INFO ] Duration: 15060.72 ms + [ INFO ] Count: 5124 iterations + [ INFO ] Duration: 15036.37 ms [ INFO ] Latency: - [ INFO ] Median: 34.51 ms - [ INFO ] Average: 34.67 ms - [ INFO ] Min: 21.44 ms - [ INFO ] Max: 51.10 ms - [ INFO ] Throughput: 344.21 FPS + [ INFO ] Median: 34.72 ms + [ INFO ] Average: 35.02 ms + [ INFO ] Min: 25.74 ms + [ INFO ] Max: 52.00 ms + [ INFO ] Throughput: 340.77 FPS Compare accuracy of the Original and Quantized Models diff --git a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png index a03840fa0f4bec..24dd29e6b7751f 100644 --- a/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-keypoint-detection-with-output_files/yolov11-keypoint-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6b92af78802b6615cc954b803042caa64f3969c1947652978beaaf6481a5104 -size 507485 +oid sha256:498642bae277603810b461dcd81b4e67eb3cce1e0f1099f346b8d832dde4720f +size 504317 diff --git a/docs/notebooks/yolov11-object-detection-with-output.rst b/docs/notebooks/yolov11-object-detection-with-output.rst index 40256995ebd00f..b446d6d3c1d41e 100644 --- a/docs/notebooks/yolov11-object-detection-with-output.rst +++ b/docs/notebooks/yolov11-object-detection-with-output.rst @@ -141,7 +141,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg') @@ -213,8 +213,8 @@ Let us consider the examples: .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 79.4ms - Speed: 2.5ms preprocess, 79.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 80.3ms + Speed: 2.2ms preprocess, 80.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640) @@ -243,15 +243,15 @@ preserve dynamic shapes in the model. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB) - OpenVINO: starting export with openvino 2024.5.0-16913-890f2e12c98... + OpenVINO: starting export with openvino 2024.5.0-16993-9c432a3641a... OpenVINO: export success ✅ 1.8s, saved as 'yolo11n_openvino_model/' (5.4 MB) Export complete (2.0s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization Predict: yolo predict task=detect model=yolo11n_openvino_model imgsz=640 half Validate: yolo val task=detect model=yolo11n_openvino_model imgsz=640 data=/usr/src/ultralytics/ultralytics/cfg/datasets/coco.yaml half Visualize: https://netron.app @@ -326,12 +326,12 @@ ready to check model prediction for object detection. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.3.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolo11n_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 20.7ms - Speed: 2.8ms preprocess, 20.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 19.9ms + Speed: 1.8ms preprocess, 19.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640) @@ -645,8 +645,8 @@ on the image. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 17.5ms - Speed: 1.8ms preprocess, 17.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov11-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 18.1ms + Speed: 1.8ms preprocess, 18.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) @@ -697,18 +697,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.57 ms + [ INFO ] Read model took 18.52 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,3,?,?] @@ -717,14 +717,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'x': [1,3,640,640] - [ INFO ] Reshape model took 4.68 ms + [ INFO ] Reshape model took 8.03 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 319.17 ms + [ INFO ] Compile model took 317.51 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -761,17 +761,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 31.01 ms + [ INFO ] First inference took 30.05 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 18528 iterations - [ INFO ] Duration: 120050.83 ms + [ INFO ] Count: 18396 iterations + [ INFO ] Duration: 120062.14 ms [ INFO ] Latency: - [ INFO ] Median: 37.96 ms - [ INFO ] Average: 38.74 ms - [ INFO ] Min: 19.79 ms - [ INFO ] Max: 98.97 ms - [ INFO ] Throughput: 154.33 FPS + [ INFO ] Median: 38.20 ms + [ INFO ] Average: 39.02 ms + [ INFO ] Min: 22.85 ms + [ INFO ] Max: 99.30 ms + [ INFO ] Throughput: 153.22 FPS .. code:: ipython3 @@ -787,18 +787,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.43 ms + [ INFO ] Read model took 26.57 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,3,640,640] @@ -814,7 +814,7 @@ models. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.model.23/aten::cat/Concat_7) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 515.22 ms + [ INFO ] Compile model took 533.02 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -851,17 +851,17 @@ models. [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 19.14 ms + [ INFO ] First inference took 29.56 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5184 iterations - [ INFO ] Duration: 15055.81 ms + [ INFO ] Count: 5148 iterations + [ INFO ] Duration: 15046.93 ms [ INFO ] Latency: - [ INFO ] Median: 34.47 ms - [ INFO ] Average: 34.65 ms - [ INFO ] Min: 18.19 ms - [ INFO ] Max: 51.47 ms - [ INFO ] Throughput: 344.32 FPS + [ INFO ] Median: 34.51 ms + [ INFO ] Average: 34.87 ms + [ INFO ] Min: 24.28 ms + [ INFO ] Max: 69.01 ms + [ INFO ] Throughput: 342.13 FPS Next steps diff --git a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png index f6ad963c53f95c..d5c2464840ad65 100644 --- a/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png +++ b/docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_43_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ed070bb0687ea1e958c91f59b98f38b9834911f23f78c1a8aad55bed3c7cf6d -size 569483 +oid sha256:4e54004e6e28473d61aacba64acb407291de3581ae9a0b792a3d8f96bd394f08 +size 572509 diff --git a/docs/notebooks/yolov8-obb-with-output.rst b/docs/notebooks/yolov8-obb-with-output.rst index 87e73f76558ffa..3d9ef3d88c0751 100644 --- a/docs/notebooks/yolov8-obb-with-output.rst +++ b/docs/notebooks/yolov8-obb-with-output.rst @@ -173,6 +173,7 @@ instance. + Run inference ~~~~~~~~~~~~~ diff --git a/docs/notebooks/yolov8-object-detection-with-output.rst b/docs/notebooks/yolov8-object-detection-with-output.rst index 6b7dacd839a587..16648e4559f618 100644 --- a/docs/notebooks/yolov8-object-detection-with-output.rst +++ b/docs/notebooks/yolov8-object-detection-with-output.rst @@ -121,7 +121,7 @@ Install necessary packages. .. code:: ipython3 %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" - %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.3.0" onnx tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.2.24" onnx tqdm opencv-python --extra-index-url https://download.pytorch.org/whl/cpu Import required utility functions. The lower cell will download the ``notebook_utils`` Python module from GitHub. @@ -161,7 +161,7 @@ Import required utility functions. The lower cell will download the .. parsed-literal:: - PosixPath('/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg') + PosixPath('/home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg') @@ -194,10 +194,9 @@ Let us consider the examples: from PIL import Image from ultralytics import YOLO - DET_MODEL_NAME = "yolo11n" + DET_MODEL_NAME = "yolov8n" - det_model = YOLO(f"{DET_MODEL_NAME}.pt") - det_model.to("cpu") + det_model = YOLO(models_dir / f"{DET_MODEL_NAME}.pt") label_map = det_model.model.names res = det_model(IMAGE_PATH) @@ -206,14 +205,24 @@ Let us consider the examples: .. parsed-literal:: + Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'models/yolov8n.pt'... - image 1/1 /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 2 dogs, 101.6ms - Speed: 3.1ms preprocess, 101.6ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640) +.. parsed-literal:: + + 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 6.25M/6.25M [00:02<00:00, 2.29MB/s] + + +.. parsed-literal:: + image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 1 dog, 82.9ms + Speed: 2.4ms preprocess, 82.9ms inference, 475.6ms postprocess per image at shape (1, 3, 480, 640) -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png + + + +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png @@ -230,10 +239,27 @@ preserve dynamic shapes in the model. .. code:: ipython3 # object detection model - det_model_path = Path(f"{DET_MODEL_NAME}_openvino_model/{DET_MODEL_NAME}.xml") + det_model_path = models_dir / f"{DET_MODEL_NAME}_openvino_model/{DET_MODEL_NAME}.xml" if not det_model_path.exists(): det_model.export(format="openvino", dynamic=True, half=True) + +.. parsed-literal:: + + Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.1.0+cu121 CPU (Intel Core(TM) i9-10980XE 3.00GHz) + + PyTorch: starting from 'models/yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) + + OpenVINO: starting export with openvino 2024.3.0-16041-1e3b88e4e3f-releases/2024/3... + OpenVINO: export success ✅ 1.7s, saved as 'models/yolov8n_openvino_model/' (6.4 MB) + + Export complete (3.1s) + Results saved to /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/models + Predict: yolo predict task=detect model=models/yolov8n_openvino_model imgsz=640 half + Validate: yolo val task=detect model=models/yolov8n_openvino_model imgsz=640 data=coco.yaml half + Visualize: https://netron.app + + Verify model inference ~~~~~~~~~~~~~~~~~~~~~~ @@ -297,7 +323,6 @@ ready to check model prediction for object detection. det_model.predictor.inference = infer det_model.predictor.model.pt = False - det_model res = det_model(IMAGE_PATH) Image.fromarray(res[0].plot()[:, :, ::-1]) @@ -306,8 +331,8 @@ ready to check model prediction for object detection. .. parsed-literal:: - image 1/1 /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 1 bicycle, 2 cars, 1 dog, 16.9ms - Speed: 3.7ms preprocess, 16.9ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /home/akash/intel/openvino_notebooks/notebooks/yolov8-optimization/data/coco_bike.jpg: 640x640 2 bicycles, 2 cars, 1 dog, 16.1ms + Speed: 3.4ms preprocess, 16.1ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640) @@ -368,16 +393,245 @@ evaluation function. zip_ref.extractall(OUT_DIR / "coco/images") + +.. parsed-literal:: + + /home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/val2017.zip: 0%| … + + +.. parsed-literal:: + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + IOPub message rate exceeded. + The Jupyter server will temporarily stop sending output + to the client in order to avoid crashing it. + To change this limit, set the config variable + `--ServerApp.iopub_msg_rate_limit`. + + Current values: + ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec) + ServerApp.rate_limit_window=3.0 (secs) + + + .. parsed-literal:: - '/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/val2017.zip' already exists. - '/home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco2017labels-segments.zip' already exists. + '/home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/coco2017labels-segments.zip' already exists. .. parsed-literal:: - /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco.yaml… + /home/akash/intel/NNCF/nncf/examples/post_training_quantization/openvino/yolov8/datasets/coco.yaml: 0%| … Define validation function @@ -520,7 +774,7 @@ validator class instance. .. parsed-literal:: - val: Scanning /home/ea/work/openvino_notebooks_new_clone/openvino_notebooks/notebooks/yolov8-optimization/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00\ **Note**: Model evaluation is time consuming +process and can take several minutes, depending on the hardware. For +reducing calculation time, we define ``num_samples`` parameter with +evaluation subset size, but in this case, accuracy can be noncomparable +with originally reported by the authors of the model, due to validation +subset difference. *To validate the models on the full dataset set +``NUM_TEST_SAMPLES = None``.* + +.. code:: ipython3 + + NUM_TEST_SAMPLES = 300 + +.. code:: ipython3 + + fp_det_stats = test(det_ov_model, core, det_data_loader, det_validator, num_samples=NUM_TEST_SAMPLES) + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00 ([1,640,640,3], [N,H,W,C], f32) convert layout [N,C,H,W]: ([1,640,640,3], [N,H,W,C], f32) -> ([1,3,640,640], [N,C,H,W], f32) @@ -1348,7 +1704,7 @@ Now, we can skip these preprocessing steps in detect function: -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png @@ -1527,7 +1883,7 @@ Run the object detection: -.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png +.. image:: yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png .. parsed-literal:: diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg deleted file mode 100644 index 7a742941b54c17..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:394895a09cc413a6343edb2a6f6242258701b6911c09d8405eacc7c27f6fac62 -size 110819 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png index 6d4bfe012b6187..703c1375a98784 100644 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b18cc2e60a802c059cd5cc05598a06c4affd959216e03674eeb68e822295843 -size 913173 +oid sha256:e85f0434876997adc7cf2152fa667b4e271817a443a75ca350d284aee48f9145 +size 908034 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg deleted file mode 100644 index 40ac3a8da0dc2a..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90d9079b877726f30f154885745b36d8bc281f4970ce10f86529b9739a56ac72 -size 112567 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png deleted file mode 100644 index e7c4fb31035d98..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_38_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6fad172495a797bc753d19d54bd4afaf8f08ed36dfd1b9761df7bfa948673aab -size 911538 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png new file mode 100644 index 00000000000000..44e7623ec662d1 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6778bdd31a39722d8ce3fb9ddea6383e451ab1b32cf23fad23b556bd89c99707 +size 907722 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg deleted file mode 100644 index 202af866d221dd..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02d585dcd341e2580baf166b6f35337d2da17561a251fb5475041e2a20d6e558 -size 110078 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png deleted file mode 100644 index d0d2028049c4f3..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_61_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44c1e80aa859828b9aae3240fb544b483df8dbd8b7c7bca29479a2a0714612c2 -size 929124 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png deleted file mode 100644 index cdae11c722ca65..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_67_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c6c2609efd786cba1d9baac015d06040939ff8d1e7644f67d10d6df5c171a62 -size 569438 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png new file mode 100644 index 00000000000000..3b025ea9323f25 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44f35cf238dcd208fcebe73bb388947f5528a167b511c8098d040b05d9bb819 +size 931491 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png new file mode 100644 index 00000000000000..6d628536486d93 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_76_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0d406144e3b07ae6bef629cd7b4973424847958b9865e7133bd0f0c7deeb5c +size 534154 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg deleted file mode 100644 index bcf00f3d50dcb5..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc502c95e9f5526befe662fcc79a6a91d4f5ff0cfce09165436e91f4ef8224ad -size 113613 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png deleted file mode 100644 index e886ccd29b47f9..00000000000000 --- a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c7bb27d4264dbc22e0006e42fb2f1619992aeac7220232119578e42a0d8a083 -size 904352 diff --git a/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png new file mode 100644 index 00000000000000..5073fa9f3eca09 --- /dev/null +++ b/docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d38bd078c6eeda706640d0e35907a9a504bf78f9dd8dbf2961de865c294802 +size 907783 diff --git a/docs/notebooks/yolov9-optimization-with-output.rst b/docs/notebooks/yolov9-optimization-with-output.rst index 0cf84003171753..b132215f390216 100644 --- a/docs/notebooks/yolov9-optimization-with-output.rst +++ b/docs/notebooks/yolov9-optimization-with-output.rst @@ -21,7 +21,6 @@ The tutorial consists of the following steps: - Compare performance of the FP32 and quantized models. - Run optimized model inference on video - **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -60,20 +59,12 @@ Prerequisites .. code:: ipython3 - import platform - - %pip install -q "openvino>=2023.3.0" "nncf>=2.8.1" "opencv-python" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "openvino>=2023.3.0" "nncf>=2.8.1" "opencv-python" "matplotlib>=3.4" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -100,9 +91,9 @@ Prerequisites Cloning into 'yolov9'... remote: Enumerating objects: 781, done. remote: Total 781 (delta 0), reused 0 (delta 0), pack-reused 781 (from 1) - Receiving objects: 100% (781/781), 3.27 MiB | 10.53 MiB/s, done. - Resolving deltas: 100% (330/330), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 + Receiving objects: 100% (781/781), 3.27 MiB | 16.41 MiB/s, done. + Resolving deltas: 100% (331/331), done. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 Get PyTorch model @@ -140,7 +131,7 @@ applicable for other models from YOLO V9 family. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/model/gelan-c.pt') @@ -195,9 +186,11 @@ using ``ov.save_model``. .. parsed-literal:: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/experimental.py:243: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + ckpt = torch.load(attempt_download(w), map_location='cpu') # load Fusing layers... Model summary: 387 layers, 25288768 parameters, 0 gradients, 102.1 GFLOPs - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/790/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9/models/yolo.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif self.dynamic or self.shape != shape: @@ -578,10 +571,10 @@ asymmetric quantization of activations. .. parsed-literal:: - 2024-10-08 06:55:28.833031: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-08 06:55:28.867508: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-05 05:37:29.322072: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-05 05:37:29.357249: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-08 06:55:29.471960: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-05 05:37:29.968269: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -669,18 +662,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.27 ms + [ INFO ] Read model took 26.54 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [?,3,?,?] @@ -692,7 +685,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 7.74 ms + [ INFO ] Reshape model took 8.17 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -702,7 +695,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 487.17 ms + [ INFO ] Compile model took 499.10 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -739,17 +732,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 173.19 ms + [ INFO ] First inference took 170.42 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] [ INFO ] Count: 222 iterations - [ INFO ] Duration: 15646.59 ms + [ INFO ] Duration: 15614.22 ms [ INFO ] Latency: - [ INFO ] Median: 412.14 ms - [ INFO ] Average: 418.38 ms - [ INFO ] Min: 285.25 ms - [ INFO ] Max: 798.40 ms - [ INFO ] Throughput: 14.19 FPS + [ INFO ] Median: 412.60 ms + [ INFO ] Average: 419.41 ms + [ INFO ] Min: 210.55 ms + [ INFO ] Max: 885.97 ms + [ INFO ] Throughput: 14.22 FPS .. code:: ipython3 @@ -763,18 +756,18 @@ models. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.5.0-16913-890f2e12c98 + [ INFO ] Build ................................. 2024.5.0-16993-9c432a3641a [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 41.47 ms + [ INFO ] Read model took 41.88 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: x) : f32 / [...] / [1,3,640,640] @@ -786,7 +779,7 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 0.04 ms + [ INFO ] Reshape model took 0.05 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: x) : u8 / [N,C,H,W] / [1,3,640,640] @@ -796,7 +789,7 @@ models. [ INFO ] xi.3 (node: __module.model.22/aten::cat/Concat_1) : f32 / [...] / [1,144,40,40] [ INFO ] xi (node: __module.model.22/aten::cat/Concat) : f32 / [...] / [1,144,20,20] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 870.18 ms + [ INFO ] Compile model took 946.19 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -833,17 +826,17 @@ models. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 71.43 ms + [ INFO ] First inference took 68.41 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] [ INFO ] Count: 726 iterations - [ INFO ] Duration: 15150.76 ms + [ INFO ] Duration: 15191.13 ms [ INFO ] Latency: - [ INFO ] Median: 121.03 ms - [ INFO ] Average: 124.70 ms - [ INFO ] Min: 65.35 ms - [ INFO ] Max: 268.30 ms - [ INFO ] Throughput: 47.92 FPS + [ INFO ] Median: 121.43 ms + [ INFO ] Average: 125.10 ms + [ INFO ] Min: 56.89 ms + [ INFO ] Max: 305.84 ms + [ INFO ] Throughput: 47.79 FPS Run Live Object Detection diff --git a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png index 8d7867fc6e5b5a..ccb15ab8382b86 100644 --- a/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png +++ b/docs/notebooks/yolov9-optimization-with-output_files/yolov9-optimization-with-output_36_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:071b74de5f9e1088f4d187a6c3d339cb90758fe7d12d7b7de495fd5266e50946 -size 499096 +oid sha256:13788e6c93a735cf601513bf18c5e6cae29819bdff2bd506fa90a4d34ae4bcc5 +size 496199 diff --git a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py index bb26683cd9e579..c82e0a8d5995f7 100644 --- a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py +++ b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py @@ -122,7 +122,7 @@ def process_coveo_meta(meta, url, link): namespace_element = ET.SubElement(url, namespace) for tag_name, tag_value in values.items(): - if tag_name == 'ovcategory': + if tag_name == 'ovdoctype': processed_link = process_link(link) ET.SubElement(namespace_element, tag_name).text = processed_link else: diff --git a/docs/openvino_sphinx_theme/README.md b/docs/openvino_sphinx_theme/README.md index 7931c481c308aa..2e82fa06e8c185 100644 --- a/docs/openvino_sphinx_theme/README.md +++ b/docs/openvino_sphinx_theme/README.md @@ -4,7 +4,8 @@ 1. Install the `openvino_sphinx_theme` using `python`: ``` -python setup.py install --user +cd openvino/docs/openvino_sphinx_theme +python -m pip install --user . ``` 2. Update the `html_theme` variable in your `conf.py`: diff --git a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html index 25acb3c1e5cbda..a2ab53c6a57a83 100644 --- a/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html +++ b/docs/openvino_sphinx_theme/openvino_sphinx_theme/templates/layout.html @@ -28,7 +28,7 @@ {# The data-cfasync attribute disables CloudFlare's Rocket loader so that #} {# mode/theme are correctly set before the browser renders the page. #} {# https://github.com/pydata/pydata-sphinx-theme/pull/1045 #} - + - - - - + + - - - - {% endblock %} - {% block docs_navbar %} {{ super() }} {% include 'baner.html' %} diff --git a/docs/sphinx_setup/_templates/search.html b/docs/sphinx_setup/_templates/search.html index 3519f6e7e02f19..5430f24f74aa8c 100644 --- a/docs/sphinx_setup/_templates/search.html +++ b/docs/sphinx_setup/_templates/search.html @@ -2,133 +2,100 @@ {% set title = _('Search') %} {%- block content %} - {% block docs_navbar %} - {{ super() }} - {% include 'baner.html' %} - {% endblock %} +{% block docs_navbar %} +{{ super() }} +{% include 'baner.html' %} +{% endblock %} - {% block body %} - - - - - - - - - - - -
- - -
- - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- {% endblock %} - - {%- block scripts_end %} - {{ _webpack.body_post() }} - {%- endblock %} +{% block body %} + + + + + + + + +
+ + +
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+
+{% endblock %} +{%- block scripts_end %} +{{ _webpack.body_post() }} {%- endblock %} + +{%- endblock %} \ No newline at end of file diff --git a/docs/sphinx_setup/api/ie_python_api/api.rst b/docs/sphinx_setup/api/ie_python_api/api.rst index efbe25ee40bcde..6fc754da1246b7 100644 --- a/docs/sphinx_setup/api/ie_python_api/api.rst +++ b/docs/sphinx_setup/api/ie_python_api/api.rst @@ -119,6 +119,12 @@ OpenVINO Python API openvino.runtime.opset15 +.. autosummary:: + :toctree: _autosummary + :template: custom-module-template.rst + + openvino.runtime.opset16 + .. autosummary:: :toctree: _autosummary :template: custom-module-template.rst diff --git a/docs/sphinx_setup/assets/versions_raw.js b/docs/sphinx_setup/assets/versions_raw.js new file mode 100644 index 00000000000000..8045057450bf5f --- /dev/null +++ b/docs/sphinx_setup/assets/versions_raw.js @@ -0,0 +1 @@ +var data='[{"version": "2024"}, {"version": "2023.3"}, {"version": "2022.3"}, {"version": "nightly"}, {"version": "archives"}]'; \ No newline at end of file diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index 148309ccbafe96..01c74de0175bcf 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -84,7 +84,7 @@ ov_sitemap_meta = [ ('coveo:metadata', { 'ovversion': version_name, - 'ovcategory': 'null' + 'ovdoctype': 'null' }) ] @@ -193,7 +193,6 @@ 'css/textfield.css', 'css/tabs.css', 'css/coveo_custom.css', - 'https://static.cloud.coveo.com/atomic/v2/themes/coveo.css', 'https://cdn.jsdelivr.net/npm/@splidejs/splide@4.1.4/dist/css/splide.min.css', ] diff --git a/docs/sphinx_setup/index.rst b/docs/sphinx_setup/index.rst index a65b5b5892c560..9d376877b51d08 100644 --- a/docs/sphinx_setup/index.rst +++ b/docs/sphinx_setup/index.rst @@ -124,11 +124,11 @@ Places to Begin Cloud-ready deployments for microservice applications. - .. button-link:: openvino-workflow/running-inference.html + .. button-link:: openvino-workflow/model-server/ovms_what_is_openvino_model_server.html :color: primary :outline: - Try it out + Check out Model Server .. grid-item-card:: Model Compression :img-top: ./assets/images/home_begin_tile_06.png diff --git a/samples/cpp/benchmark_app/benchmark_app.hpp b/samples/cpp/benchmark_app/benchmark_app.hpp index 99cbd7edff8856..cf38ff6708ad29 100644 --- a/samples/cpp/benchmark_app/benchmark_app.hpp +++ b/samples/cpp/benchmark_app/benchmark_app.hpp @@ -65,6 +65,12 @@ static const char cache_dir_message[] = "Optional. Enables caching of loaded mod static const char load_from_file_message[] = "Optional. Loads model from file directly without read_model." " All CNNNetwork options (like re-shape) will be ignored"; +/// @brief message for maximum inference rate +static const char maximum_inference_rate_message[] = + "Optional. Maximum inference rate by frame per second" + "If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities. " + "Tweaking this value allow better accuracy in power usage measurement by limiting the execution."; + /// @brief message for execution time static const char execution_time_message[] = "Optional. Time in seconds to execute topology."; @@ -307,6 +313,9 @@ DEFINE_string(api, "async", api_message); /// @brief Number of infer requests in parallel DEFINE_uint64(nireq, 0, infer_requests_count_message); +/// @brief Execute infer requests at a fixed frequency +DEFINE_double(max_irate, 0, maximum_inference_rate_message); + /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases) DEFINE_string(nstreams, "", infer_num_streams_message); @@ -388,6 +397,7 @@ static void show_usage() { std::cout << " -hint (latency or throughput or cumulative_throughput or none) " << hint_message << std::endl; std::cout << " -niter " << iterations_count_message << std::endl; + std::cout << " -max_irate \"\" " << maximum_inference_rate_message << std::endl; std::cout << " -t " << execution_time_message << std::endl; std::cout << std::endl; std::cout << "Input shapes" << std::endl; diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 2cfd15b77afb6e..1f1b89c2427e67 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,10 @@ bool parse_and_check_command_line(int argc, char* argv[]) { if (FLAGS_api != "async" && FLAGS_api != "sync") { throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value."); } + if (FLAGS_api == "sync" && FLAGS_nireq > FLAGS_niter) { + throw std::logic_error( + "Number of iterations should be greater than number of infer requests when using sync API."); + } if (!FLAGS_hint.empty() && FLAGS_hint != "throughput" && FLAGS_hint != "tput" && FLAGS_hint != "latency" && FLAGS_hint != "cumulative_throughput" && FLAGS_hint != "ctput" && FLAGS_hint != "none") { throw std::logic_error("Incorrect performance hint. Please set -hint option to" @@ -1153,6 +1158,12 @@ int main(int argc, char* argv[]) { execTime = std::chrono::duration_cast(Time::now() - startTime).count(); processedFramesN += batchSize; + + if (FLAGS_max_irate > 0) { + auto nextRunFinishTime = 1 / FLAGS_max_irate * processedFramesN * 1.0e9; + std::this_thread::sleep_for( + std::chrono::nanoseconds(static_cast(nextRunFinishTime - execTime))); + } } // wait the latest inference executions diff --git a/samples/cpp/common/utils/src/args_helper.cpp b/samples/cpp/common/utils/src/args_helper.cpp index f4a3d10ceb0b5b..ba58f98e498e90 100644 --- a/samples/cpp/common/utils/src/args_helper.cpp +++ b/samples/cpp/common/utils/src/args_helper.cpp @@ -29,8 +29,7 @@ void readInputFilesArguments(std::vector& files, const std::string& arg) { struct stat sb; if (stat(arg.c_str(), &sb) != 0) { - slog::warn << "File " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " file or directory not found."); } if (S_ISDIR(sb.st_mode)) { struct CloseDir { @@ -43,17 +42,20 @@ void readInputFilesArguments(std::vector& files, const std::string& using Dir = std::unique_ptr; Dir dp(opendir(arg.c_str())); if (dp == nullptr) { - slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " directory cannot be opened!"); } struct dirent* ep; + size_t files_size = files.size(); while (nullptr != (ep = readdir(dp.get()))) { std::string fileName = ep->d_name; if (fileName == "." || fileName == "..") continue; files.push_back(arg + "/" + ep->d_name); } + if (files.size() == files_size) { + throw std::invalid_argument("No files were found in directory " + arg); + } } else { files.push_back(arg); } diff --git a/samples/js/node/.gitignore b/samples/js/node/.gitignore index 2ab910508d5529..c836a5862da256 100644 --- a/samples/js/node/.gitignore +++ b/samples/js/node/.gitignore @@ -1,2 +1,3 @@ node_modules -hello_reshape_ssd/out.jpg +out*.jpg +output/ diff --git a/samples/js/node/README.md b/samples/js/node/README.md index 9e69769778d8a8..2fd4dbe76e5900 100644 --- a/samples/js/node/README.md +++ b/samples/js/node/README.md @@ -7,13 +7,15 @@ To run samples, install dependencies first. In current directory run: npm install ``` -Note: Perform these steps also before running notebooks. +Note: Perform this step also before running notebooks. ## Samples - hello_classification - hello_reshape_ssd - classification_sample_async + - optical_character_recognition + - vision_background_removal ## Notebooks diff --git a/samples/js/node/classification_sample_async/README.md b/samples/js/node/classification_sample_async/README.md index 0b19e908587505..6a0dcba6d16e02 100644 --- a/samples/js/node/classification_sample_async/README.md +++ b/samples/js/node/classification_sample_async/README.md @@ -4,7 +4,12 @@ Models with only 1 input and output are supported. Run: ```bash -node classification_sample_async.js -m *path_to_model_file* -i *path_to_img1* -i *path_to_img2* -d AUTO +node classification_sample_async.js -m ../../assets/models/v3-small_224_1.0_float.xml -i ../../assets/images/coco.jpg -i ../../assets/images/coco_hollywood.jpg -d AUTO +``` + +Where +```bash +node classification_sample_async.js -m *path_to_model_file* -i *path_to_img1* -i *path_to_img2* -d *device* ``` Other details see in [../../../python/classification_sample_async/README.md](../../../python/classification_sample_async/README.md) diff --git a/samples/js/node/classification_sample_async/classification_sample_async.js b/samples/js/node/classification_sample_async/classification_sample_async.js index 94f4d7828c95c2..96724be80d3b0b 100644 --- a/samples/js/node/classification_sample_async/classification_sample_async.js +++ b/samples/js/node/classification_sample_async/classification_sample_async.js @@ -1,39 +1,30 @@ const { addon: ov } = require('openvino-node'); const args = require('args'); -const { cv } = require('opencv-wasm'); -const { getImageData } = require('../helpers.js'); - -args.options([{ - name: 'img', - defaultValue: [], -}, { - name: 'model', -}, { - name: 'device', -}]); -const { model: modelPath, device: deviceName, img: images } = - args.parse(process.argv); - -main(modelPath, images, deviceName); - -function completionCallback(result, imagePath) { - const predictions = Array.from(result.data) - .map((prediction, classId) => ({ prediction, classId })) - .sort(({ prediction: predictionA }, { prediction: predictionB }) => - predictionA === predictionB ? 0 : predictionA > predictionB ? -1 : 1); - - console.log(`Image path: ${imagePath}`); - console.log('Top 10 results:'); - console.log('class_id probability'); - console.log('--------------------'); - predictions.slice(0, 10).forEach(({ classId, prediction }) => - console.log(`${classId}\t ${prediction.toFixed(7)}`), - ); - console.log(); -} - -async function main(modelPath, images, deviceName) { +const Image = require('../image.js'); +const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json'); + +args.options([ + { + name: 'img', + defaultValue: [], + }, + { + name: 'model', + }, + { + name: 'device', + }, +]); +const { + model: modelPath, + device: deviceName, + img: imgPaths +} = args.parse(process.argv); + +main(modelPath, imgPaths, deviceName); + +async function main(modelPath, imgPaths, deviceName) { //----------- Step 1. Initialize OpenVINO Runtime Core ----------------------- console.log('Creating OpenVINO Runtime Core'); const core = new ov.Core(); @@ -42,8 +33,6 @@ async function main(modelPath, images, deviceName) { console.log(`Reading the model: ${modelPath}`); // (.xml and .bin files) or (.onnx file) const model = await core.readModel(modelPath); - const [h, w] = model.inputs[0].shape.slice(-2); - const tensorShape = [1, h, w, 3]; if (model.inputs.length !== 1) throw new Error('Sample supports only single input topologies'); @@ -52,56 +41,65 @@ async function main(modelPath, images, deviceName) { throw new Error('Sample supports only single output topologies'); //----------- Step 3. Set up input ------------------------------------------- - // Read input image - const imagesData = []; - - for (const imagePath of images) - imagesData.push(await getImageData(imagePath)); + const inputImages = []; + const [, inputHeight, inputWidth] = model.inputs[0].getShape(); - const preprocessedImages = imagesData.map((imgData) => { - // Use opencv-wasm to preprocess image. - const originalImage = cv.matFromImageData(imgData); - const image = new cv.Mat(); - // The MobileNet model expects images in RGB format. - cv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB); - cv.resize(image, image, new cv.Size(w, h)); + // Read input image, resize it to the model's input size and convert it to a tensor. + for (const path of imgPaths) { + const img = await Image.load(path); + const resized = img.resize(inputWidth, inputHeight); - return new Uint8Array(image.data); - }); + inputImages.push(resized); + } //----------- Step 4. Apply preprocessing ------------------------------------ const _ppp = new ov.preprocess.PrePostProcessor(model); _ppp.input().tensor().setLayout('NHWC').setElementType(ov.element.u8); - _ppp.input().model().setLayout('NCHW'); + _ppp.input().model().setLayout('NHWC'); _ppp.output().tensor().setElementType(ov.element.f32); _ppp.build(); - //----------------- Step 5. Loading model to the device ---------------------- + //----------- Step 5. Loading model to the device ---------------------------- console.log('Loading the model to the plugin'); const compiledModel = await core.compileModel(model, deviceName); const outputName = compiledModel.output(0).toString(); - //----------- Step 6. Collecting promises to react when they resolve --------- - console.log('Starting inference in asynchronous mode'); + //----------- Step 6. Do inference ------------------------------------------- + console.log('Starting inference\n'); // Create infer request const inferRequest = compiledModel.createInferRequest(); - - const promises = preprocessedImages.map((tensorData, i) => { - const inferPromise = inferRequest.inferAsync([ - new ov.Tensor(ov.element.u8, tensorShape, tensorData) - ]); + const promises = inputImages.map((img, i) => { + const inferPromise = inferRequest.inferAsync([img.toTensor()]); inferPromise.then(result => - completionCallback(result[outputName], images[i])); + completionCallback(result[outputName], imgPaths[i])); return inferPromise; }); - //----------- Step 7. Do inference ------------------------------------------- + //----------- Step 7. Wait till all inferences execute ----------------------- await Promise.all(promises); console.log('All inferences executed'); console.log('\nThis sample is an API example, for any performance ' + 'measurements please use the dedicated benchmark_app tool'); } + +function completionCallback(result, imagePath) { + const predictions = Array.from(result.data) + .map((prediction, classId) => ({ prediction, classId })) + .sort(({ prediction: predictionA }, { prediction: predictionB }) => + predictionA === predictionB ? 0 : predictionA > predictionB ? -1 : 1); + + const imagenetClasses = ['background', ...Object.values(imagenetClassesMap)]; + + console.log(`Image path: ${imagePath}`); + console.log('Top 5 results:\n'); + console.log('id\tprobability\tlabel'); + console.log('---------------------------------'); + predictions.slice(0, 5).forEach(({ classId, prediction }) => + console.log(`${classId}\t${prediction.toFixed(7)}\t${imagenetClasses[classId][1]}`), + ); + console.log(); +} diff --git a/samples/js/node/fetch-samples-assets.js b/samples/js/node/fetch-samples-assets.js new file mode 100644 index 00000000000000..33dd509a922f85 --- /dev/null +++ b/samples/js/node/fetch-samples-assets.js @@ -0,0 +1,83 @@ +const { downloadFile } = require('./helpers.js'); + +const host = 'https://storage.openvinotoolkit.org'; + +const models = [ + // hello classification + '/repositories/openvino_notebooks/models/mobelinet-v3-tf/FP32/v3-small_224_1.0_float.xml', + '/repositories/openvino_notebooks/models/mobelinet-v3-tf/FP32/v3-small_224_1.0_float.bin', + + // hello reshape ssd + '/repositories/open_model_zoo/2022.3/models_bin/1/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.xml', + '/repositories/open_model_zoo/2022.3/models_bin/1/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.bin', + + // hello detection, optical character recognition + '/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.xml', + '/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.bin', + + '/repositories/open_model_zoo/public/text-recognition-resnet-fc/text-recognition-resnet-fc.xml', + '/repositories/open_model_zoo/public/text-recognition-resnet-fc/text-recognition-resnet-fc.bin', + + // vision background removal + '/repositories/open_model_zoo/public/vision-background-removal/unet_ir_model.xml', + '/repositories/open_model_zoo/public/vision-background-removal/unet_ir_model.bin', + + // pose estimation + '/repositories/open_model_zoo/2022.1/models_bin/3/human-pose-estimation-0001/FP16-INT8/human-pose-estimation-0001.xml', + '/repositories/open_model_zoo/2022.1/models_bin/3/human-pose-estimation-0001/FP16-INT8/human-pose-estimation-0001.bin', + + // question answering + '/repositories/open_model_zoo/2022.3/models_bin/1/bert-small-uncased-whole-word-masking-squad-0001/FP16/bert-small-uncased-whole-word-masking-squad-0001.xml', +]; +const modelsDir = __dirname + '/../assets/models'; + + +const images = [ + // hello classification + '/repositories/openvino_notebooks/data/data/image/coco.jpg', + + // hello reshape ssd + '/repositories/openvino_notebooks/data/data/image/empty_road_mapillary.jpg', + + // hello detection, optical character recognition, pose estimation + '/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg', + + // vision background removal + '/repositories/openvino_notebooks/data/data/image/coco_hollywood.jpg', + '/repositories/openvino_notebooks/data/data/image/wall.jpg', +]; +const imagesDir = __dirname + '/../assets/images'; + +const datasets = [ + // hello classification + '/repositories/openvino_notebooks/data/data/datasets/imagenet/imagenet_class_index.json', +]; +const datasetsDir = __dirname + '/../assets/datasets'; + +const vocab = [ + '/repositories/openvino_notebooks/data/data/text/bert-uncased/vocab.txt', +]; +const vocabDir = __dirname + '/../assets/vocab'; + +try { + main(); +} catch(error) { + console.error('Error Occurred', error); +} + +async function main() { + await downloadAssets(models, modelsDir); + await downloadAssets(images, imagesDir); + await downloadAssets(datasets, datasetsDir); + await downloadAssets(vocab, vocabDir); +} + +async function downloadAssets(links, destinationDir) { + for (const link of links) { + const url = host + link; + const filename = link.split('/').pop(); + + await downloadFile(url, filename, destinationDir); + console.log(`Downloaded: ${filename} \n`); + } +} diff --git a/samples/js/node/hello_classification/README.md b/samples/js/node/hello_classification/README.md index 2de983af58334b..1d20bba8ac63e6 100644 --- a/samples/js/node/hello_classification/README.md +++ b/samples/js/node/hello_classification/README.md @@ -2,9 +2,13 @@ Models with only 1 input and output are supported. -Run: +Run sample: ```bash -node hello_classification.js *path_to_model_file* *path_to_img* AUTO +node hello_classification.js ../../assets/models/v3-small_224_1.0_float.xml ../../assets/images/coco.jpg AUTO +``` +Where +```bash +node hello_classification.js *path_to_model_file* *path_to_img* *device* ``` Other details see in [../../../python/hello_classification/README.md](../../../python/hello_classification/README.md) diff --git a/samples/js/node/hello_classification/hello_classification.js b/samples/js/node/hello_classification/hello_classification.js index 00ba868e40f3a0..9edbc998d006bd 100644 --- a/samples/js/node/hello_classification/hello_classification.js +++ b/samples/js/node/hello_classification/hello_classification.js @@ -1,7 +1,7 @@ const { addon: ov } = require('openvino-node'); -const { cv } = require('opencv-wasm'); -const { getImageData } = require('../helpers.js'); +const Image = require('../image.js'); +const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json'); // Parsing and validation of input arguments if (process.argv.length !== 5) @@ -31,21 +31,12 @@ async function main(modelPath, imagePath, deviceName) { //----------------- Step 3. Set up input ------------------------------------- // Read input image - const imgData = await getImageData(imagePath); - - // Use opencv-wasm to preprocess image. - const originalImage = cv.matFromImageData(imgData); - const image = new cv.Mat(); - // The MobileNet model expects images in RGB format. - cv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB); - - const tensorData = new Float32Array(image.data); - const shape = [1, image.rows, image.cols, 3]; - const inputTensor = new ov.Tensor(ov.element.f32, shape, tensorData); + const img = await Image.load(imagePath); + const inputTensor = img.toTensor(); //----------------- Step 4. Apply preprocessing ------------------------------ const _ppp = new ov.preprocess.PrePostProcessor(model); - _ppp.input().tensor().setShape(shape).setLayout('NHWC'); + _ppp.input().tensor().setElementType(ov.element.u8).setShape(inputTensor.getShape()).setLayout('NHWC'); _ppp.input().preprocess().resize(ov.preprocess.resizeAlgorithm.RESIZE_LINEAR); _ppp.input().model().setLayout('NHWC'); _ppp.output().tensor().setElementType(ov.element.f32); @@ -69,12 +60,14 @@ async function main(modelPath, imagePath, deviceName) { .sort(({ prediction: predictionA }, { prediction: predictionB }) => predictionA === predictionB ? 0 : predictionA > predictionB ? -1 : 1); + const imagenetClasses = ['background', ...Object.values(imagenetClassesMap)]; + console.log(`Image path: ${imagePath}`); - console.log('Top 10 results:'); - console.log('class_id probability'); - console.log('--------------------'); + console.log('Top 10 results:\n'); + console.log('id\tprobability\tlabel'); + console.log('---------------------------------'); predictions.slice(0, 10).forEach(({ classId, prediction }) => - console.log(`${classId}\t ${prediction.toFixed(7)}`), + console.log(`${classId}\t${prediction.toFixed(7)}\t${imagenetClasses[classId][1]}`), ); console.log('\nThis sample is an API example, for any performance ' diff --git a/samples/js/node/hello_reshape_ssd/README.md b/samples/js/node/hello_reshape_ssd/README.md index 21d8be8ec4b50d..547cf989478a87 100644 --- a/samples/js/node/hello_reshape_ssd/README.md +++ b/samples/js/node/hello_reshape_ssd/README.md @@ -2,9 +2,13 @@ Models with only 1 input and output are supported. -Run: +Run sample: ```bash -node hello_reshape_ssd.js *path_to_model_file* *path_to_img* AUTO +node hello_reshape_ssd.js ../../assets/models/road-segmentation-adas-0001.xml ../../assets/images/empty_road_mapillary.jpg AUTO +``` +Where +```bash +node hello_reshape_ssd.js *path_to_model_file* *path_to_img* *device* ``` Other details see in [../../../python/hello_reshape_ssd/README.md](../../../python/hello_reshape_ssd/README.md) diff --git a/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js b/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js index 416e8bc8180668..ebcc8e83223853 100644 --- a/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js +++ b/samples/js/node/hello_reshape_ssd/hello_reshape_ssd.js @@ -1,13 +1,5 @@ const { addon: ov } = require('openvino-node'); - -const fs = require('node:fs/promises'); -const { cv } = require('opencv-wasm'); -const { - setShape, - getImageData, - getImageBuffer, - arrayToImageData, -} = require('../helpers.js'); +const Image = require('../image.js'); // Parsing and validation of input arguments if (process.argv.length !== 5) @@ -38,24 +30,15 @@ async function main(modelPath, imagePath, deviceName) { //----------------- Step 3. Set up input ------------------------------------- // Read input image - const imgData = await getImageData(imagePath); - - // Use opencv-wasm to preprocess image. - const originalImage = cv.matFromImageData(imgData); - const image = new cv.Mat(); - // The MobileNet model expects images in RGB format. - cv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB); - - const tensorData = new Uint8Array(image.data); - const shape = [1, image.rows, image.cols, 3]; - const inputTensor = new ov.Tensor(ov.element.u8, shape, tensorData); + const img = await Image.load(imagePath); + const inputTensor = img.toTensor(); //----------------- Step 4. Apply preprocessing ------------------------------ const _ppp = new ov.preprocess.PrePostProcessor(model); _ppp.input().preprocess().resize(ov.preprocess.resizeAlgorithm.RESIZE_LINEAR); _ppp.input().tensor() - .setShape(shape) + .setShape(inputTensor.getShape()) .setElementType(ov.element.u8) .setLayout('NHWC'); @@ -70,48 +53,51 @@ async function main(modelPath, imagePath, deviceName) { //---------------- Step 6. Create infer request and do inference synchronously console.log('Starting inference in synchronous mode'); const inferRequest = compiledModel.createInferRequest(); - inferRequest.setInputTensor(inputTensor); - inferRequest.infer(); + const outputs = inferRequest.infer([inputTensor]); //----------------- Step 7. Process output ----------------------------------- const outputLayer = compiledModel.outputs[0]; - const resultInfer = inferRequest.getTensor(outputLayer); - const predictions = Array.from(resultInfer.data); - const [height, width] = [originalImage.rows, originalImage.cols]; - - const detections = setShape(predictions, [100, 7]); - const color = [255, 0, 0, 255]; - const THROUGHPUT = 0.9; - - detections.forEach(detection => { - const [classId, confidence, xmin, ymin, xmax, ymax] = detection.slice(1); - - if (confidence < THROUGHPUT) return; - - console.log(`Found: classId = ${classId}, ` - + `confidence = ${confidence.toFixed(2)}, ` - + `coords = (${xmin}, ${ymin}), (${xmax}, ${ymax})`, - ); - - // Draw a bounding box on a output image - cv.rectangle(originalImage, - new cv.Point(xmin*width, ymin*height), - new cv.Point(xmax*width, ymax*height), - color, - 2, - ); - }); - - const resultImgData = arrayToImageData(originalImage.data, width, height); + const output = outputs[outputLayer]; + const outputData = output.data; + const resultLayer = []; + const colormap = [ + [68, 1, 84, 255], + [48, 103, 141, 255], + [53, 183, 120, 255], + [199, 216, 52, 255], + ]; + const size = outputData.length/4; + + for (let i = 0; i < size; i++) { + const valueAt = (i, number) => outputData[i + number*size]; + const currentValues = { + bg: valueAt(i, 0), + c: valueAt(i, 1), + h: valueAt(i, 2), + w: valueAt(i, 3), + }; + const values = Object.values(currentValues); + const maxIndex = values.indexOf(Math.max(...values)); + + resultLayer.push(maxIndex); + } + + const pixels = []; + resultLayer.forEach(i => pixels.push(...colormap[i])); + + const alpha = 0.6; const filename = 'out.jpg'; + const [, , H, W] = output.getShape(); - await fs.writeFile(`./${filename}`, getImageBuffer(resultImgData)); + const segmentsImg = Image.fromArray(pixels, W, H); + const resizedSegments = segmentsImg.resize(img.width, img.height); + const mergedImg = Image.overlay(img, resizedSegments, alpha); try { - await fs.readFile(filename); - console.log('Image out.jpg was created!'); + await mergedImg.save(filename); + console.log(`Image '${filename}' was created.`); } catch(err) { - console.log(`Image ${filename} was not created. Check your permissions.`); + console.log(`Image '${filename}' was not created. Check your permissions.`); } console.log('\nThis sample is an API example, for any performance ' diff --git a/samples/js/node/helpers.js b/samples/js/node/helpers.js index 5cbd6650495070..5d56d4c60a7139 100644 --- a/samples/js/node/helpers.js +++ b/samples/js/node/helpers.js @@ -1,17 +1,8 @@ const path = require('node:path'); -const { cv } = require('opencv-wasm'); const { createWriteStream } = require('node:fs'); const { mkdir, stat } = require('node:fs/promises'); const { HttpsProxyAgent } = require('https-proxy-agent'); -const { - Image, - ImageData, - loadImage, - createCanvas, - createImageData, -} = require('canvas'); - module.exports = { exp, sum, @@ -23,96 +14,29 @@ module.exports = { setShape, transform, downloadFile, - displayImage, - getImageData, extractValues, - getImageBuffer, - arrayToImageData, - displayArrayAsImage, matrixMultiplication, }; -function arrayToImageData(array, width, height) { - return createImageData(new Uint8ClampedArray(array), width, height); -} - -function getImageBuffer(imageOrImageData) { - const canvas = createCanvas(imageOrImageData.width, imageOrImageData.height); - const ctx = canvas.getContext('2d'); - - if (imageOrImageData instanceof Image) - ctx.drawImage(imageOrImageData, 0, 0); - else if (imageOrImageData instanceof ImageData) - ctx.putImageData(imageOrImageData, 0, 0); - else - throw Error(`Passed parameters has type '${typeof imageOrImageData}'. ` - + 'It is\'t supported.'); - - return canvas.toBuffer('image/jpeg'); -} - -function displayImage(imageOrImageData, display) { - const buffer = getImageBuffer(imageOrImageData); - - display.image(buffer); -} - -function displayArrayAsImage(arr, width, height, display) { - const alpha = 255; - const componentsPerPixel = arr.length / (width*height); - - try { - switch (componentsPerPixel) { - case 1: - arr = arr.reduce((acc, val) => { - acc.push(val, val, val, alpha); - - return acc; - }, []); - break; - - case 3: - arr = arr.reduce((acc, val, index) => { - if (index && index%3 === 0) acc.push(alpha); - - acc.push(val); - - return acc; - }, []); - break; - } - } catch(e) { - console.log(e); - } - - const imageData = arrayToImageData(arr, width, height); - - displayImage(imageData, display); -} - -async function getImageData(path) { - const image = await loadImage(path); - const { width, height } = image; - - const canvas = await createCanvas(width, height); - const ctx = canvas.getContext('2d'); - - ctx.drawImage(image, 0, 0); - - return ctx.getImageData(0, 0, width, height); -} - function transform(arr, { width, height }, order) { - const img = new cv.Mat(height, width, cv.CV_8UC3); - - img.data.set(arr, 0, arr.length); + // Calculate the number of pixels and the size of each channel + const numPixels = width * height; + const channels = [[], [], []]; + + // Separate RGB channels + for (let i = 0; i < numPixels; i++) { + channels[0].push(arr[i * 3]); // Red channel + channels[1].push(arr[i * 3 + 1]); // Green channel + channels[2].push(arr[i * 3 + 2]); // Blue channel + } - const channels = new cv.MatVector(); - cv.split(img, channels); + // Reorder channels based on the 'order' array + const reorderedChannels = order.map(num => channels[num]); - const val = order.map(num => [...channels.get(num).data]); + // Flatten reordered channels into a single array + const result = reorderedChannels.flat(); - return [].concat(...val); + return result; } async function downloadFile(url, filename, destination) { diff --git a/samples/js/node/image.js b/samples/js/node/image.js new file mode 100644 index 00000000000000..30ac35032d7ed7 --- /dev/null +++ b/samples/js/node/image.js @@ -0,0 +1,275 @@ +const { + ImageData, + loadImage, + createCanvas, +} = require('@napi-rs/canvas'); +const path = require('node:path'); +const fs = require('node:fs/promises'); +const { addon: ov } = require('openvino-node'); + +const codeENOENT = 'ENOENT'; + +class OvImage { + constructor(imageData) { + this.imageData = imageData; + this.channels = imageData.data.length / (this.width*this.height); + } + + get width() { + return this.imageData.width; + } + + get height() { + return this.imageData.height; + } + + get rgb() { + return this.imageData.data.filter((_, index) => index % 4 !== 3); + } + + get rgba() { + return this.imageData.data; + } + + get grayscale() { + const grayData = new Uint8ClampedArray(this.width * this.height); + + for (let i = 0; i < this.imageData.data.length; i += 4) { + const [r, g, b] = this.imageData.data.slice(i, i + 3); + const gray = 0.299 * r + 0.587 * g + 0.114 * b; + + grayData[i / 4] = gray; + } + + return grayData; + } + + get canvasCtx() { + const canvas = createCanvas(this.width, this.height); + const ctx = canvas.getContext('2d'); + + ctx.putImageData(this.imageData, 0, 0); + + return ctx; + } + + get buffer() { + return this.canvasCtx.canvas.toBuffer('image/jpeg'); + } + + drawRect(x, y, width, height, properties) { + const ctx = this.canvasCtx; + + ctx.strokeStyle = properties.color || 'red'; + ctx.lineWidth = properties.width || 1; + ctx.strokeRect(x, y, width, height); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + drawText(text, x, y, properties) { + const ctx = this.canvasCtx; + + ctx.font = properties.font || '30px Arial'; + ctx.fillStyle = properties.color || 'red'; + ctx.fillText(text, x, y); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + drawCircle(x, y, radius, properties) { + const ctx = this.canvasCtx; + + ctx.strokeStyle = properties.color || 'red'; + ctx.lineWidth = properties.width || 1; + ctx.beginPath(); + ctx.arc(x, y, radius, 0, 2 * Math.PI); + ctx.stroke(); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + drawLine(x1, y1, x2, y2, properties) { + const ctx = this.canvasCtx; + + ctx.strokeStyle = properties.color || 'red'; + ctx.lineWidth = properties.width || 1; + ctx.beginPath(); + ctx.moveTo(x1, y1); + ctx.lineTo(x2, y2); + ctx.stroke(); + + const imageData = ctx.getImageData(0, 0, this.width, this.height); + + return new OvImage(imageData); + } + + toTensor() { + return new ov.Tensor( + ov.element.u8, + [1, this.height, this.width, 3], + new Uint8ClampedArray(this.rgb), + ); + } + + resize(newWidth, newHeight) { + const ctx = this.canvasCtx; + + const canvas2 = createCanvas(newWidth, newHeight); + const ctx2 = canvas2.getContext('2d'); + ctx2.drawImage(ctx.canvas, 0, 0, newWidth, newHeight); + + const imageData = ctx2.getImageData(0, 0, newWidth, newHeight); + + return new OvImage(imageData); + } + + invert() { + const invertedData = this.rgba.map((value, index) => { + if (index % 4 === 3) + return 255; + + return 255 - value; + }); + + return OvImage.fromArray(invertedData, this.width, this.height); + } + + crop(x, y, width, height) { + const canvas2 = createCanvas(width, height); + const ctx2 = canvas2.getContext('2d'); + + ctx2.drawImage(this.canvasCtx.canvas, x, y, width, height, 0, 0, width, height); + + const imageData = ctx2.getImageData(0, 0, width, height); + + return new OvImage(imageData); + } + + async save(filepath) { + const destination = path.dirname(filepath); + + try { + await fs.access(destination); + } catch(error) { + if (error.code !== codeENOENT) throw error; + + await fs.mkdir(destination, { recursive: true }); + } + + return await fs.writeFile(filepath, this.buffer); + } + + // Display the image using the node notebook display object + display(display) { + display.image(this.buffer); + } + + static async load(path) { + const image = await loadImage(path); + const { width, height } = image; + + const canvas = await createCanvas(width, height); + const ctx = canvas.getContext('2d'); + + ctx.drawImage(image, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, width, height)); + } + + static fromArray(arr, width, height) { + const canvas = createCanvas(width, height); + const ctx = canvas.getContext('2d'); + + const imageData = new ImageData( + new Uint8ClampedArray(arr), + width, + height, + ); + + ctx.putImageData(imageData, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, width, height)); + } + + static merge(img1, img2) { + if (img1.width !== img2.width || img1.height !== img2.height) + throw new Error('Images should have the same size'); + + const canvas = createCanvas(img1.width, img1.height); + const ctx = canvas.getContext('2d'); + + const img1Data = img1.imageData.data; + const img2Data = img2.imageData.data; + + const mergedData = img1Data.map((_, index) => { + if (index % 4 === 3) + return 255; + + return (img1Data[index] + img2Data[index]); + }); + + const imageData = new ImageData( + new Uint8ClampedArray(mergedData), + img1.width, + img1.height, + ); + + ctx.putImageData(imageData, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, img1.width, img1.height)); + } + + static mask(img1, img2) { + if (img1.width !== img2.width || img1.height !== img2.height) + throw new Error('Images should have the same size'); + + const canvas = createCanvas(img1.width, img1.height); + const ctx = canvas.getContext('2d'); + + const img1Data = img1.imageData.data; + const img2Data = img2.imageData.data; + + const subtractedData = img1Data.map((_, index) => { + if (index % 4 === 3) + return 255; + + return img1Data[index] * (img2Data[index] / 255); + }); + + const imageData = new ImageData( + new Uint8ClampedArray(subtractedData), + img1.width, + img1.height, + ); + + ctx.putImageData(imageData, 0, 0); + + return new OvImage(ctx.getImageData(0, 0, img1.width, img1.height)); + } + + static overlay(img1, img2, alpha) { + if (img1.width !== img2.width || img1.height !== img2.height) + throw new Error('Images should have the same size'); + + const img1Data = img1.imageData.data; + const img2Data = img2.imageData.data; + + const overlayedData = img1Data.map((_, index) => { + if (index % 4 === 3) + return 255; + + return img1Data[index] * (1 - alpha) + img2Data[index] * alpha; + }); + + return OvImage.fromArray(overlayedData, img1.width, img1.height); + } +} + +module.exports = OvImage; diff --git a/samples/js/node/notebooks/hello-detection.nnb b/samples/js/node/notebooks/hello-detection.nnb index f6139ec7ec8b98..60640b3bd042ea 100644 --- a/samples/js/node/notebooks/hello-detection.nnb +++ b/samples/js/node/notebooks/hello-detection.nnb @@ -17,57 +17,42 @@ { "language": "typescript", "source": [ - "const { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\nconst { transform, getImageData, displayArrayAsImage, downloadFile } = require('../helpers.js');\n\nconst { addon: ov } = require('openvino-node'); \n" + "const { addon: ov } = require('openvino-node');\nconst { display } = require('node-kernel');\n\nconst Image = require('../image');\nconst { transform, argMax, setShape } = require('../helpers.js');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Download the Model" + "## Load the Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'horizontal-text-detection-0001';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/';\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" + "const modelXMLPath = '../../assets/models/horizontal-text-detection-0001.xml';\n\n// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO');\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output('boxes');\n" ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/horizontal-text-detection-0001.bin'", - "" - ] - } - ] - } - ] + "outputs": [] }, { "language": "markdown", "source": [ - "## Download an Image" + "## Load an Image" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\n\nawait downloadFile(imgUrl, 'intel_rnb.jpg', baseImagesDir);\n" + "const imagePath = '../../assets/images/intel_rnb.jpg';\nconst img = await Image.load(imagePath);\nimg.display(display);\n\n// Resize the image to meet network input size\nconst [inputHeight, inputWidth] = inputLayer.shape.slice(2);\nconst resizedImg = img.resize(inputWidth, inputHeight);\n\n// Prepare input tensor\nconst inputImageTransformedData = transform(\n resizedImg.rgb,\n { width: inputWidth, height: inputHeight },\n [0, 1, 2],\n);\nconst tensorData = new Float32Array(inputImageTransformedData);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/images/intel_rnb.jpg'", - "" - ] + "mime": "image/jpeg", + "value": "" } ] } @@ -76,51 +61,28 @@ { "language": "markdown", "source": [ - "## Load the Model" + "## Do Inference" ], "outputs": [] }, { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output('boxes');\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "## Load an Image" + "const inferRequest = compiledModel.createInferRequest();\nconst result = await inferRequest.inferAsync([tensor]);\n" ], "outputs": [] }, - { - "language": "typescript", - "source": [ - "const imgData = await getImageData('../../assets/images/intel_rnb.jpg');\nconst originalImage = cv.matFromImageData(imgData);\nconst { cols: originalWidth, rows: originalHeight } = originalImage;\n\nconst image = new cv.Mat();\nconst resizedImage = new cv.Mat();\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\ncv.cvtColor(image, image, cv.COLOR_BGR2RGB);\n\nconst [B, C, H, W] = inputLayer.shape;\n\ncv.resize(image, resizedImage, new cv.Size(W, H));\n\nconst inputImage = transform(resizedImage.data, { width: W, height: H }, [0, 1, 2]); // NHWC to NCHW\n\ndisplayArrayAsImage(originalImage.data, originalWidth, originalHeight, display);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] - }, { "language": "markdown", "source": [ - "## Do Inference" + "## Initialize helper functions\n" ], "outputs": [] }, { "language": "typescript", "source": [ - "const tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, Int32Array.from(inputLayer.shape), tensorData);\n\nconst inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\ninferRequest.infer();\n\nconst output = inferRequest.getTensor(outputLayer);\nconst { data: boxes } = output;\n" + "// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => {\n const position = idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return Math.floor(position);\n }\n\n return box.map(scaleShape);\n}" ], "outputs": [] }, @@ -134,14 +96,14 @@ { "language": "typescript", "source": [ - "// For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:\n// The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function\nfunction convertResultToImage(bgrImage, resizedImage, boxes, componentsCount, options) {\n\tconst defaultOptions = { threshold: 0.3, confLabels: true };\n\tconst { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n\t// Define colors for boxes and descriptions.\n\tconst colors = { red: [255, 0, 0, 255], green: [0, 255, 0, 255] };\n\n\t// Fetch the image shapes to calculate a ratio.\n\tconst [realY, realX] = [bgrImage.rows, bgrImage.cols];\n\tconst [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols];\n\n\tconst [ratioX, ratioY] = [realX / resizedX, realY / resizedY];\n\n\t// Convert the base image from BGR to RGB format.\n\tconst rgbImage = new cv.Mat();\n\tcv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB);\n\n\t// Iterate through non-zero boxes.\n\tfor (let i = 0; i < boxes.length; i += componentsCount) {\n\t\tconst box = boxes.slice(i, i + componentsCount);\n\n\t\t// Pick a confidence factor from the last place in an array.\n\t\tconst conf = box[box.length - 1];\n\n\t\tif (conf < threshold) continue;\n\n\t\t// Convert float to int and multiply corner position of each box by x and y ratio.\n\t\t// If the bounding box is found at the top of the image, \n\t\t// position the upper box bar little lower to make it visible on the image. \n\t\tconst [xMin, yMin, xMax, yMax] = box.slice(0, -1).map((cornerPosition, idx) =>\n\t\t\tidx % 2\n\t\t\t\t? parseInt(Math.max(cornerPosition * ratioY, 10))\n\t\t\t\t: parseInt(cornerPosition * ratioX)\n\t\t);\n\t\t// Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.\n\t\tcv.rectangle(rgbImage, new cv.Point(xMin, yMin), new cv.Point(xMax, yMax), colors.green, 2);\n\n\t\t// Add text to the image based on position and confidence.\n\t\t// Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.\n\t\tif (confLabels)\n\t\t\tcv.putText(\n\t\t\t\trgbImage,\n\t\t\t\t`${conf.toFixed(2)}`,\n\t\t\t\tnew cv.Point(xMin, yMin - 10),\n\t\t\t\tcv.FONT_HERSHEY_SIMPLEX,\n\t\t\t\t0.8,\n\t\t\t\tcolors.red,\n\t\t\t\t1,\n\t\t\t\tcv.LINE_AA,\n\t\t\t);\n\t}\n\n\treturn rgbImage;\n}\n\nconst [ __, componentsCount] = output.getShape();\nconst img = convertResultToImage(image, resizedImage, boxes, componentsCount, { confLabels: false });\n\ndisplayArrayAsImage(img.data, originalWidth, originalHeight, display);\n" + "// Calculate ratios\nconst [ratioX, ratioY] = [img.width / inputWidth, img.height / inputHeight];\nconst boundingBoxesArray = extractBoundingBoxes(result[outputLayer]);\n// Resize bounding boxes to the original image size\nconst boundingBoxesOriginalSizeArray = boundingBoxesArray.map(box =>\n [...multiplyByRatio(ratioX, ratioY, box), box[4]]);\n\n// Takes original image and bounding boxes\n// and returns the image with bounding boxes drawn on it\nasync function putBoundingBoxesOnImage(img, boxes, threshold = 0.3) {\n let finalImage = img;\n\n for (const box of boxes) {\n const conf = box[box.length - 1];\n\n if (conf < threshold) continue;\n\n const [xMin, yMin, xMax, yMax] = box;\n\n finalImage = finalImage.drawRect(\n xMin, yMin,\n xMax - xMin, yMax - yMin,\n { color: 'red', width: 3 },\n );\n }\n\n return finalImage;\n}\n\n\nconst resultImg = await putBoundingBoxesOnImage(\n img,\n boundingBoxesOriginalSizeArray,\n);\nresultImg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } diff --git a/samples/js/node/notebooks/hello-segmentation.nnb b/samples/js/node/notebooks/hello-segmentation.nnb index 8df4e6227701e9..a7da34a2799edf 100644 --- a/samples/js/node/notebooks/hello-segmentation.nnb +++ b/samples/js/node/notebooks/hello-segmentation.nnb @@ -17,62 +17,10 @@ { "language": "typescript", "source": [ - "const {\n getImageData, \n displayArrayAsImage, \n arrayToImageData,\n transform,\n downloadFile,\n} = require('../helpers');\n\nconst { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\n\nconst { addon: ov } = require('openvino-node'); \n" + "const { display } = require('node-kernel');\nconst { addon: ov } = require('openvino-node');\n\nconst Image = require('../image.js');\nconst {\n transform,\n} = require('../helpers');\n" ], "outputs": [] }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'road-segmentation-adas-0001';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/road-segmentation-adas-0001/FP32/';\n\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/road-segmentation-adas-0001.bin'", - "" - ] - } - ] - } - ] - }, - { - "language": "markdown", - "source": [ - "## Download an Image" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/empty_road_mapillary.jpg';\n\nawait downloadFile(imgUrl, 'empty_road_mapillary.jpg', baseImagesDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/images/empty_road_mapillary.jpg'", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -83,7 +31,7 @@ { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n" + "const modelXMLPath = '../../assets/models/road-segmentation-adas-0001.xml';\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO');\n\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n" ], "outputs": [] }, @@ -97,14 +45,14 @@ { "language": "typescript", "source": [ - "const imgData = await getImageData('../../assets/images/empty_road_mapillary.jpg');\n\nconst originalImage = cv.matFromImageData(imgData);\nconst { cols: originalWidth, rows: originalHeight } = originalImage;\n\nconst image = new cv.Mat();\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\ncv.cvtColor(image, image, cv.COLOR_BGR2RGB); \n\nconst [B, C, H, W] = inputLayer.shape;\n\ncv.resize(image, image, new cv.Size(W, H));\n\nconst inputImage = transform(image.data, { width: W, height: H }, [0, 1, 2]); // NHWC to NCHW\n\ndisplayArrayAsImage(originalImage.data, originalWidth, originalHeight, display);\n" + "const img = await Image.load('../../assets/images/empty_road_mapillary.jpg');\n\nimg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } @@ -120,18 +68,9 @@ { "language": "typescript", "source": [ - "const tensor_data = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensor_data);\n\nconst inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\ninferRequest.infer();\n\nconst output = inferRequest.getTensor(outputLayer);\n\nconst { data: outputData } = output;\nconst layers = { bg: [], c: [], h: [], w: [] };\nconst resultLayer = [];\nconst colormap = [[68, 1, 84, 255], [48, 103, 141, 255], [53, 183, 120, 255], [199, 216, 52, 255]];\n\nconst size = outputData.length/4;\n\nfor (let i = 0; i < size; i++) {\n const valueAt = (i, number) => outputData[i + number*size];\n\n const currentValues = { \n bg: valueAt(i, 0),\n c: valueAt(i, 1),\n h: valueAt(i, 2),\n w: valueAt(i, 3),\n };\n const values = Object.values(currentValues);\n const maxIndex = values.indexOf(Math.max(...values));\n\n resultLayer.push(maxIndex);\n}\n\nconst pixels = [];\nresultLayer.forEach(i => pixels.push(...colormap[i]));\n\ndisplayArrayAsImage(pixels, W, H, display);\n" + "const [height, width] = inputLayer.shape.slice(2);\nconst resizedImg = img.resize(width, height);\n\n// Transform image data from NHWC to NCHW to match model input\n// as alternative you can use ov.preprocess.PrePostProcessor\n// see hello_reshape_ssd.js sample\nconst transformedImgData = transform(resizedImg.rgb, { width, height }, [0, 1, 2]);\nconst tensor = new ov.Tensor(\n ov.element.f32,\n inputLayer.shape,\n new Float32Array(transformedImgData),\n);\n\nconst inferRequest = compiledModel.createInferRequest();\nconst outputs = await inferRequest.inferAsync([tensor]);\nconst output = outputs[outputLayer];\nconst outputData = output.data;\n" ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] + "outputs": [] }, { "language": "markdown", @@ -143,14 +82,14 @@ { "language": "typescript", "source": [ - "const alpha = 0.3;\n\nconst pixelsAsImageData = arrayToImageData(pixels, W, H);\nconst mask = cv.matFromImageData(pixelsAsImageData);\n\ncv.resize(mask, mask, new cv.Size(originalWidth, originalHeight));\n\ncv.addWeighted(mask, alpha, originalImage, 1 - alpha, 0, mask);\n\ndisplayArrayAsImage(mask.data, originalWidth, originalHeight, display);\n" + "const layers = { bg: [], c: [], h: [], w: [] };\nconst resultLayer = [];\nconst colormap = [\n [68, 1, 84, 255],\n [48, 103, 141, 255],\n [53, 183, 120, 255],\n [199, 216, 52, 255],\n];\nconst size = outputData.length/4;\n\nfor (let i = 0; i < size; i++) {\n const valueAt = (i, number) => outputData[i + number*size];\n\n const currentValues = {\n bg: valueAt(i, 0),\n c: valueAt(i, 1),\n h: valueAt(i, 2),\n w: valueAt(i, 3),\n };\n const values = Object.values(currentValues);\n const maxIndex = values.indexOf(Math.max(...values));\n\n resultLayer.push(maxIndex);\n}\n\nconst pixels = [];\nresultLayer.forEach(i => pixels.push(...colormap[i]));\n\nconst alpha = 0.6;\nconst [N, C, H, W] = output.getShape();\n\nconst segmentsImg = Image.fromArray(pixels, W, H);\nconst resizedSegments = segmentsImg.resize(img.width, img.height);\nconst mergedImg = Image.overlay(img, resizedSegments, alpha);\n\nmergedImg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } diff --git a/samples/js/node/notebooks/hello-world.nnb b/samples/js/node/notebooks/hello-world.nnb index dbc46d196345ab..83d4ca8bec29f5 100644 --- a/samples/js/node/notebooks/hello-world.nnb +++ b/samples/js/node/notebooks/hello-world.nnb @@ -17,62 +17,10 @@ { "language": "javascript", "source": [ - "const { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\nconst { getImageData, displayImage, downloadFile } = require('../helpers.js');\n\nconst { addon: ov } = require('openvino-node');\n" + "const { display } = require('node-kernel');\nconst { addon: ov } = require('openvino-node');\n\nconst Image = require('../image.js');\nconst imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json');\n" ], "outputs": [] }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "javascript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'v3-small_224_1.0_float';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/models/mobelinet-v3-tf/FP32/';\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/v3-small_224_1.0_float.bin'", - "" - ] - } - ] - } - ] - }, - { - "language": "markdown", - "source": [ - "## Download an Image and Imagenet Classes" - ], - "outputs": [] - }, - { - "language": "javascript", - "source": [ - "const imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg';\nconst classesUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/datasets/imagenet/imagenet_class_index.json';\n\nawait downloadFile(imgUrl, 'coco.jpg', '../../assets/images');\nawait downloadFile(classesUrl, 'imagenet_class_index.json', '../../assets/datasets');\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/datasets/imagenet_class_index.json'", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -83,7 +31,7 @@ { "language": "javascript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst outputLayer = compiledModel.outputs[0];\n" + "const modelXMLPath = '../../assets/models/v3-small_224_1.0_float.xml';\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputLayer = compiledModel.inputs[0];\nconst outputLayer = compiledModel.outputs[0];\n" ], "outputs": [] }, @@ -97,14 +45,14 @@ { "language": "javascript", "source": [ - "const imgData = await getImageData('../../assets/images/coco.jpg');\n\n// Use opencv-wasm to preprocess image.\nconst originalImage = cv.matFromImageData(imgData);\nconst image = new cv.Mat();\n// The MobileNet model expects images in RGB format.\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\n// Resize to MobileNet image shape.\ncv.resize(image, image, new cv.Size(224, 224));\n\ndisplayImage(imgData, display);\n" + "const img = await Image.load('../../assets/images/coco.jpg');\n\nimg.display(display);\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } @@ -120,7 +68,7 @@ { "language": "javascript", "source": [ - "const tensorData = new Float32Array(image.data);\nconst tensor = new ov.Tensor(ov.element.f32, Int32Array.from([1, 224, 224, 3]), tensorData);\n" + "const [N, inputHeight, inputWidth] = inputLayer.shape;\nconst resizedImg = img.resize(inputWidth, inputHeight);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, new Float32Array(resizedImg.rgb));\n" ], "outputs": [] }, @@ -134,14 +82,21 @@ { "language": "javascript", "source": [ - "const inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\ninferRequest.infer();\n\nconst resultInfer = inferRequest.getTensor(outputLayer);\nconst resultIndex = resultInfer.data.indexOf(Math.max(...resultInfer.data));\n" + "const inferRequest = compiledModel.createInferRequest();\nconst result = await inferRequest.inferAsync([tensor]);\n\nconst outputData = result[outputLayer].data;\nconst resultIndex = outputData.indexOf(Math.max(...outputData));\nconst prediction = outputData[resultIndex];\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "## Display result based on maximum probability" ], "outputs": [] }, { "language": "javascript", "source": [ - "const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json');\nconst imagenetClasses = ['background', ...Object.values(imagenetClassesMap)];\n\nconsole.log(`Result: ${imagenetClasses[resultIndex][1]}`);\n" + "const imagenetClassesMap = require('../../assets/datasets/imagenet_class_index.json');\nconst imagenetClasses = ['background', ...Object.values(imagenetClassesMap)];\n\nconsole.log('Result:\\n\\nid\\tprobability\\tlabel');\nconsole.log('---------------------------------');\nconsole.log(`${resultIndex}\\t${prediction.toFixed(7)}\\t${imagenetClasses[resultIndex][1]}`);\n" ], "outputs": [ { @@ -149,7 +104,11 @@ { "mime": "application/vnd.code.notebook.stdout", "value": [ - "Result: flat-coated_retriever", + "Result:", + "", + "id\tprobability\tlabel", + "---------------------------------", + "206\t0.7470666\tflat-coated_retriever", "" ] } diff --git a/samples/js/node/notebooks/optical-character-recognition.nnb b/samples/js/node/notebooks/optical-character-recognition.nnb index b7e8e109ff857f..ce450dbce0a0d7 100644 --- a/samples/js/node/notebooks/optical-character-recognition.nnb +++ b/samples/js/node/notebooks/optical-character-recognition.nnb @@ -24,60 +24,42 @@ { "language": "typescript", "source": [ - "const fs = require(\"node:fs\");\nconst path = require(\"node:path\");\nconst { createCanvas, Image, ImageData } = require(\"canvas\");\nconst { addon: ov } = require(\"openvino-node\");\nconst { display } = require(\"node-kernel\");\nconst { cv } = require(\"opencv-wasm\");\nconst {\n transform,\n getImageData,\n displayArrayAsImage,\n downloadFile,\n arrayToImageData,\n getImageBuffer,\n argMax,\n setShape,\n} = require(\"../helpers.js\");\n" + "const { addon: ov } = require('openvino-node');\nconst { display } = require('node-kernel');\n\nconst Image = require('../image');\nconst { transform, argMax, setShape } = require('../helpers.js');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "# Download Models" + "# Load a Detection Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Intializing Images, Models\nconst baseArtifactsDir = '../../assets/models';\nconst detBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/';\nconst recBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/text-recognition-resnet-fc/';\nconst detectionModelName = 'horizontal-text-detection-0001';\nconst textRecModelName = 'text-recognition-resnet-fc';\n\nconst detModelXMLName = `${detectionModelName}.xml`;\nconst detModelBINName = `${detectionModelName}.bin`;\n\nconst detModelXMLPath = `${baseArtifactsDir}/${detModelXMLName}`;\nconst detModelBINPath = `${baseArtifactsDir}/${detModelBINName}`;\n\nconst recModelXMLName = `${textRecModelName}.xml`;\nconst recModelBINName = `${textRecModelName}.bin`;\n\nconst recModelXMLPath = `${baseArtifactsDir}/${textRecModelName}.xml`;\nconst recModelBINPath = `${baseArtifactsDir}/${textRecModelName}.bin`;\n\nawait downloadFile(\n detBaseURL + detModelXMLName,\n detModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n detBaseURL + detModelBINName,\n detModelBINName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelXMLName,\n recModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelBINName,\n recModelBINName,\n baseArtifactsDir\n);\n" + "const detModelXMLPath = '../../assets/models/horizontal-text-detection-0001.xml';\n\n// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst detModel = await core.readModel(detModelXMLPath);\nconst detCompiledModel = await core.compileModel(detModel, 'AUTO');\nconst detInputLayer = detCompiledModel.input(0);\nconst detOutputLayer = detCompiledModel.output('boxes');\n" ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.xml'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.bin'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.xml'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.bin'", - "" - ] - } - ] - } - ] + "outputs": [] }, { "language": "markdown", "source": [ - "# Dowload Image" + "# Prepare Image for Inference" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\nconst imgName = 'intel_rnb.jpg';\nawait downloadFile(imgUrl, imgName, baseImagesDir);\n" + "const imagePath = '../../assets/images/intel_rnb.jpg';\nconst img = await Image.load(imagePath);\nimg.display(display);\n\n// Resize the image to meet network input size\nconst [detInputHeight, detInputWidth] = detInputLayer.shape.slice(2);\nconst resizedImg = img.resize(detInputWidth, detInputHeight);\n\n// Prepare input tensor\nconst inputImageTransformedData = transform(\n resizedImg.rgb,\n { width: detInputWidth, height: detInputHeight },\n [0, 1, 2],\n);\nconst tensorData = new Float32Array(inputImageTransformedData);\nconst tensor = new ov.Tensor(ov.element.f32, detInputLayer.shape, tensorData);\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/images/intel_rnb.jpg'", - "" - ] + "mime": "image/jpeg", + "value": "" } ] } @@ -86,141 +68,72 @@ { "language": "markdown", "source": [ - "# Load a Detection Model" + "## Run inference on the detection model" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst detModel = await core.readModel(detModelXMLPath);\nconst detCompiledModel = await core.compileModel(detModel, 'AUTO');\nconst detInputLayer = detCompiledModel.input(0);\nconst detOutputLayer = detCompiledModel.output('boxes');\n" + "const detInferRequest = detCompiledModel.createInferRequest();\nconst detResult = await detInferRequest.inferAsync([tensor]);\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "# Prepare Image for Inference" + "# Load Text Recognition Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "const imageData = await getImageData(`${baseImagesDir}/intel_rnb.jpg`);\nconst inputImageMat = cv.matFromImageData(imageData);\nconst displayImageMat = inputImageMat.clone();\n\n// Resize the image to meet network input size\nconst [B, C, H, W] = detInputLayer.shape;\nconst resizedImage = new cv.Mat();\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB);\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB);\ncv.resize(inputImageMat, resizedImage, new cv.Size(W, H));\n\n// Prepare input tensor\nconst inputImage = transform(resizedImage.data,\n { width: W, height: H },\n [0, 1, 2]);\nconst tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(\n ov.element.f32,\n detInputLayer.shape,\n tensorData\n);\n" + "const recModelXMLPath = '../../assets/models/text-recognition-resnet-fc.xml';\n\n// Load the recognition model and prepare the inference request\nconst recModel = await core.readModel(recModelXMLPath);\nconst recModelCompiled = await core.compileModel(recModel, 'AUTO');\nconst recInferRequest = recModelCompiled.createInferRequest();\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Define Post-Processing Functions" + "# Define Post-Processing Functions" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n console.log(`Output shape: ${output.getData()}`);\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n" + "async function performTextRecognition(model, inferenceRequest, img) {\n const inputLayerShape = model.input(0).shape;\n const outputLayer = model.output(0);\n\n const [,, inputHeight, inputWidth] = inputLayerShape;\n const resizedImg = img.resize(inputWidth, inputHeight);\n\n // Convert image to grayscale and create tensor\n const tensor = new ov.Tensor(\n ov.element.f32,\n inputLayerShape,\n new Float32Array(resizedImg.grayscale),\n );\n\n const result = await inferenceRequest.inferAsync([tensor]);\n const recognitionResults = extractRecognitionResults(result[outputLayer]);\n const annotation = parseAnnotations(recognitionResults);\n\n return annotation;\n}\n\n// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => {\n const position = idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return Math.floor(position);\n }\n\n return box.map(scaleShape);\n}\n\n// Function to extract recognition results from the model output\nfunction extractRecognitionResults(output) {\n const outputData = output.getData();\n const outputShape = output.getShape();\n const [, height, width] = outputShape;\n\n return setShape(outputData, [height, width]);\n}\n\n// Function to parse annotations from the recognition results\nfunction parseAnnotations(recognitionResults) {\n const letters = '~0123456789abcdefghijklmnopqrstuvwxyz';\n const annotation = [];\n\n for (const row of recognitionResults) {\n const letterIndex = argMax(row);\n const parsedLetter = letters[letterIndex];\n\n // Stop if end character is encountered\n if (parsedLetter === letters[0]) break;\n\n annotation.push(parsedLetter);\n }\n\n return annotation.join('');\n}\n\n// Takes original image and bounding boxes with annotations\n// and returns the image with annotations\nasync function putAnnotationsOnImage(img, boxesWithAnnotations, options) {\n const defaultOptions = { threshold: 0.3, confLabels: true };\n const { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n let finalImage = img;\n\n for (const item of boxesWithAnnotations) {\n const { box, annotation } = item;\n const conf = box[box.length - 1];\n\n if (conf < threshold) continue;\n\n const [xMin, yMin, xMax, yMax] = box;\n const yOffset = 10;\n\n finalImage = finalImage.drawRect(\n xMin, yMin,\n xMax - xMin, yMax - yMin,\n { color: 'green', width: 3 },\n );\n finalImage = finalImage.drawText(\n annotation,\n xMin, yMin - yOffset,\n { font: '30px Arial' },\n );\n\n if (!confLabels) continue;\n\n finalImage = finalImage.drawText(\n conf.toFixed(2),\n xMin, yMax + 2 * yOffset,\n { font: '20px Arial' },\n );\n }\n\n return finalImage;\n}\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "# Do Inference" + "### Do Inference and Show Detected Text Boxes and OCR Results for the Image\n" ], "outputs": [] }, { "language": "typescript", "source": [ - "// Create infer request\nconst detInferRequest = detCompiledModel.createInferRequest();\n\nconst detResult = await detInferRequest.inferAsync([tensor]);\nconst boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]);\n\n// Show original image\ndisplayArrayAsImage(\n displayImageMat.data,\n displayImageMat.cols,\n displayImageMat.rows,\n display\n);\n" + "// Calculate ratios\nconst [ratioX, ratioY] = [img.width / detInputWidth, img.height / detInputHeight];\nconst boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]);\n// Resize bounding boxes to the original image size\nconst boundingBoxesOriginalSizeArray = boundingBoxesArray.map(box =>\n [...multiplyByRatio(ratioX, ratioY, box), box[4]]);\n\n// Process each bounding box and run inference on the recognition model\nconst boxesWithAnnotations = [];\nfor (let i = 0; i < boundingBoxesOriginalSizeArray.length; i++) {\n const box = boundingBoxesOriginalSizeArray[i];\n const [xMin, yMin, xMax, yMax] = box;\n const croppedImg = img.crop(xMin, yMin, xMax - xMin, yMax - yMin);\n croppedImg.display(display);\n\n const annotation = await performTextRecognition(recModel, recInferRequest, croppedImg);\n\n boxesWithAnnotations.push({ box, annotation });\n\n console.log(`Box ${i}: [${box.join(',')}], Annotation: '${annotation}'`);\n}\n" ], "outputs": [ { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] - } - ] - }, - { - "language": "markdown", - "source": [ - "# Load Text Recognition Model" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "// Loading the text recognition model\nconst recModel = await core.readModel(recModelXMLPath);\nconst recModelCompiled = await core.compileModel(recModel, 'AUTO');\nconst recInputLayer = recModelCompiled.input(0);\nconst recOutputLayer = recModelCompiled.output(0);\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "# Define Post-Processing Functions" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "// Function to calculate the ratios for the image\nfunction calculateRatios(originalImage, resizedImage) {\n const realY = originalImage.rows;\n const realX = originalImage.cols;\n const resizedY = resizedImage.rows;\n const resizedX = resizedImage.cols;\n const ratioX = realX / resizedX;\n const ratioY = realY / resizedY;\n\n return { ratioX, ratioY };\n}\n\n// Function to convert the image to grayscale\nfunction convertToGrayscale(originalImage) {\n const grayscaleImage = new cv.Mat();\n cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY);\n\n return grayscaleImage;\n}\n\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return box.map(scaleShape);\n}\n\n\n// Function to resize and convert a crop to the recognition model input format\nfunction resizeAndConvertCropToModelInput(crop, netShape) {\n const [netWidth, netHeight] = netShape;\n\n // Resize the crop to the network's input shape\n const tempImg = new cv.Mat();\n cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight));\n\n // Create the reshaped buffer\n const reshapedBuffer = new Uint8Array(netHeight * netWidth);\n let index = 0;\n\n for (let i = 0; i < netHeight; i++) {\n for (let j = 0; j < netWidth; j++) {\n reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0];\n }\n }\n\n // Clean up\n tempImg.delete();\n\n return reshapedBuffer;\n}\n\n// Function to extract recognition results from the model output\nfunction extractRecognitionResults(output) {\n const outputData = output.getData();\n const outputShape = output.getShape();\n const [batchSize, height, width] = outputShape;\n\n return setShape(outputData, [height, width]);\n}\n\n// Function to parse annotations from the recognition results\nfunction parseAnnotations(recognitionResults) {\n const letters = \"~0123456789abcdefghijklmnopqrstuvwxyz\";\n const annotation = [];\n\n for (const row of recognitionResults) {\n const letterIndex = argMax(row);\n const parsedLetter = letters[letterIndex];\n\n // Stop if end character is encountered\n if (parsedLetter === letters[0]) break;\n annotation.push(parsedLetter);\n }\n\n return annotation.join('');\n}\n\n// Function to crop the image based on the bounding box coordinates\nfunction cropImage(originalImage, xMin, yMin, xMax, yMax) {\n xMin = Math.max(0, xMin);\n yMin = Math.max(0, yMin);\n xMax = Math.min(originalImage.cols, xMax);\n yMax = Math.min(originalImage.rows, yMax);\n if (xMin >= xMax || yMin >= yMax) {\n throw new Error('Invalid crop coordinates');\n }\n const roi = originalImage.roi(\n new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin)\n );\n const cropped = new cv.Mat();\n roi.copyTo(cropped);\n roi.delete();\n\n return cropped;\n}\n\n// Function to log the bounding boxes with annotations\nfunction printSortedAnnotations(boxesWithAnnotations) {\n /* Sort the boxes with annotations based\n on their position in the input image */\n const sortedAnnotations = boxesWithAnnotations\n .sort((a, b) => {\n const [aXMin, aYMin] = a.box;\n const [bXMin, bYMin] = b.box;\n\n return (aYMin - bYMin) || (aXMin - bXMin);\n })\n .map(item => item.annotation);\n\n console.log('Sorted Annotations:', sortedAnnotations);\n}\n\n// Get Text size\nfunction getTextSize(text, fontFace, fontScale) {\n const canvas = createCanvas(200, 200);\n const ctx = canvas.getContext('2d');\n const adjustedFontScale = fontScale * 35;\n ctx.font = `${adjustedFontScale}px ${fontFace}`;\n const metrics = ctx.measureText(text);\n const width = metrics.width;\n const height =\n metrics.actualBoundingBoxAscent +\n metrics.actualBoundingBoxDescent;\n\n return { width, height };\n}\n\n/* The convertResultToImage function visualizes object detection\n results on an image by drawing bounding boxes around detected\n objects and optionally adding labels to them. */\nfunction convertResultToImage(\n bgrImage,\n resizedImage,\n boxesWithAnnotations,\n options,\n) {\n const defaultOptions = { threshold: 0.3, confLabels: true };\n const { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n const colors = {\n red: [255, 0, 0, 255],\n green: [0, 255, 0, 255],\n white: [255, 255, 255, 255]\n };\n const [realY, realX] = [bgrImage.rows, bgrImage.cols];\n const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols];\n const [ratioX, ratioY] = [realX / resizedX, realY / resizedY];\n\n const rgbImage = new cv.Mat();\n cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB);\n\n boxesWithAnnotations.forEach(({ box, annotation }) => {\n const conf = box[box.length - 1];\n\n if (conf < threshold) return;\n\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box);\n\n cv.rectangle(\n rgbImage,\n new cv.Point(xMin, yMin),\n new cv.Point(xMax, yMax),\n colors.green,\n 3\n );\n\n if (!confLabels) return;\n\n const text = `${annotation}`;\n const fontScale = 0.8;\n const thickness = 1;\n const { width: textW, height: textH } = getTextSize(text, 'Arial', fontScale);\n const imageCopy = rgbImage.clone();\n\n cv.rectangle(\n imageCopy,\n new cv.Point(xMin, yMin - textH - 10),\n new cv.Point(xMin + textW, yMin - 10),\n colors.white,\n cv.FILLED\n );\n cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage);\n cv.putText(\n rgbImage,\n text,\n new cv.Point(xMin, yMin - 10),\n cv.FONT_HERSHEY_SIMPLEX,\n fontScale,\n colors.red,\n thickness,\n cv.LINE_AA\n );\n\n imageCopy.delete();\n\n });\n\n return rgbImage;\n}\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "# Async Inference Helper Function" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "async function inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations,\n) {\n // Create infer request\n const inferRequest = recModelCompiled.createInferRequest();\n\n // Define the completion callback function\n function completionCallback(outputTensor, i, annotations) {\n const recognitionResults = extractRecognitionResults(outputTensor);\n const annotation = parseAnnotations(recognitionResults);\n annotations.push(annotation);\n }\n\n // Start inference in asynchronous mode\n try {\n const result = await inferRequest.inferAsync([tensor]);\n completionCallback(result[recOutputLayer], i, annotations);\n }catch (error) {\n console.error('Error during inference:', error);\n }\n}\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "### Do Inference and Show Detected Text Boxes and OCR Results for the Image\n" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "// Process each bounding box and run inference on the recognition model\nconst [batchSize, channels, height, width] = recInputLayer.shape;\n// Calculate ratios\nconst {\n ratioX,\n ratioY,\n} = calculateRatios(inputImageMat, resizedImage);\n\n// Convert image to grayscale\nconst grayscaleImage = convertToGrayscale(inputImageMat);\n\nconst annotations = [];\nconst croppedImages = [];\n\n\nfor (let i = 0; i < boundingBoxesArray.length; i++) {\n const crop = boundingBoxesArray[i];\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map(Math.floor);\n const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin);\n const croppedImage = grayscaleImage.roi(cropRect);\n\n try {\n const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [width, height]);\n const tensorData = new Float32Array(preprocessedCrop);\n const tensor = new ov.Tensor(\n ov.element.f32,\n Int32Array.from(recInputLayer.shape),\n tensorData\n );\n\n await inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations\n );\n\n croppedImages.push(\n cropImage(inputImageMat, xMin, yMin, xMax, yMax)\n );\n } catch (error) {\n console.error('Error during preprocessing:', error);\n }\n\n croppedImage.delete();\n}\n\ngrayscaleImage.delete();\n\nconst boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({\n box,\n annotation: annotations[index]\n}));\n\nconst resultImage = convertResultToImage(\n inputImageMat,\n resizedImage,\n boxesWithAnnotations,\n { threshold: 0.3, confLabels: true }\n);\n\ndisplayArrayAsImage(\n resultImage.data,\n resultImage.cols,\n resultImage.rows,\n display\n);\n\ncroppedImages.forEach((croppedImage) => {\n displayArrayAsImage(\n croppedImage.data,\n croppedImage.cols,\n croppedImage.rows,\n display\n );\n});\n" - ], - "outputs": [ + }, { "items": [ { "mime": "application/vnd.code.notebook.stdout", "value": [ - "Annotation for box 0: building", - "Cropped Image Size: 159 x 40", - "Annotation for box 1: noyce", - "Original Image Size: 690 x 517", - "Cropping Coordinates: (256, 50) to (377, 88)", - "Cropped Image Size: 121 x 38", - "Cropping Coordinates: (604, 205) to (653, 228)", - "Cropped Image Size: 49 x 23", - "Cropped Image Size: 26 x 32", - "Cropped Image Size: 31 x 23", - "Text: noyce, Width: 74.716796875, Height: 21", - "Text: 2200, Width: 62.2890625, Height: 19", - "Text: robert, Width: 73.14453125, Height: 20", + "Box 0: [391,59,550,99,0,0.518794059753418], Annotation: 'building'", "" ] } @@ -230,15 +143,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] }, { "items": [ { - "mime": "image/jpeg", - "value": "" + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 1: [257,50,377,88,0,0.48566171526908875], Annotation: 'noyce'", + "" + ] } ] }, @@ -246,15 +162,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] }, { "items": [ { - "mime": "image/jpeg", - "value": "" + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 2: [604,205,653,229,0,0.45074450969696045], Annotation: '2200'", + "" + ] } ] }, @@ -262,7 +181,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 3: [22,33,185,74,0,0.3334950804710388], Annotation: 'robert'", + "" + ] } ] }, @@ -270,7 +200,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 4: [506,408,538,431,0,0.32059410214424133], Annotation: 'center'", + "" + ] } ] }, @@ -278,7 +219,18 @@ "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Box 5: [203,46,229,78,0,0.3125338554382324], Annotation: 'n'", + "" + ] } ] } @@ -287,28 +239,25 @@ { "language": "markdown", "source": [ - "### Print Annotations in Plain Text Format" + "## Display the OCR Results on the original image" ], "outputs": [] }, { "language": "typescript", "source": [ - "printSortedAnnotations(boxesWithAnnotations);\n" + "const annotatedImg = await putAnnotationsOnImage(\n img,\n boxesWithAnnotations,\n { threshold: 0.3, confLabels: false },\n);\nannotatedImg.display(display);\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "Sorted Annotations: [ 'robert', 'n', 'noyce', 'building', '2200', 'center' ]", - "" - ] + "mime": "image/jpeg", + "value": "" } ] } ] } ] -} \ No newline at end of file +} diff --git a/samples/js/node/notebooks/pose-estimation.nnb b/samples/js/node/notebooks/pose-estimation.nnb index 6ca358ed2b0cf4..16774b4514ab11 100644 --- a/samples/js/node/notebooks/pose-estimation.nnb +++ b/samples/js/node/notebooks/pose-estimation.nnb @@ -17,32 +17,17 @@ { "language": "javascript", "source": [ - "const path = require('node:path');\nconst { cv } = require('opencv-wasm');\nconst { display } = require('node-kernel');\nconst tf = require('@tensorflow/tfjs-node');\nconst {\n getImageData,\n displayArrayAsImage,\n arrayToImageData,\n getImageBuffer,\n transform,\n downloadFile,\n} = require('../helpers.js');\n\nconst { addon: ov } = require('openvino-node');\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "javascript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'human-pose-estimation-0001';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = `https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/3/${modelName}/FP16-INT8/`;\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\nconst imageFilename = path.parse(imgUrl).base;\n\nawait downloadFile(imgUrl, imageFilename, '../../assets/images');\n\nconst imagePath = `../../assets/images/${imageFilename}`\n" + "const path = require('node:path');\nconst { display } = require('node-kernel');\nconst tf = require('@tensorflow/tfjs-node');\nconst { addon: ov } = require('openvino-node');\n\nconst { transform } = require('../helpers.js');\nconst Image = require('../image');\n" ], "outputs": [ { "items": [ { - "mime": "application/vnd.code.notebook.stdout", + "mime": "application/vnd.code.notebook.stderr", "value": [ - "Proxy agent configured using: 'http://proxy-dmz.intel.com:911'", - "Proxy agent configured using: 'http://proxy-dmz.intel.com:911'", - "Proxy agent configured using: 'http://proxy-dmz.intel.com:911'", - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/images/intel_rnb.jpg'", + "2024-11-04 17:09:35.860051: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.", + "2024-11-04 17:09:35.872537: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX_VNNI FMA", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.", "" ] } @@ -60,7 +45,7 @@ { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'CPU', { PERFORMANCE_HINT: 'LATENCY' });\n\nconst inputLayer = compiledModel.inputs[0];\nconst outputLayers = compiledModel.outputs;\n\nconst [height, width] = inputLayer.shape.slice(2);\n\nconst heatmapsOutputKey = 'Mconv7_stage2_L2';\n\nconst THRESHOLD = 0.3;\nconst COLOR = [0, 255, 0, 255];" + "const modelXMLPath = '../../assets/models/human-pose-estimation-0001.xml';\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO', { PERFORMANCE_HINT: 'LATENCY' });\n\nconst inputLayer = compiledModel.inputs[0];\nconst outputLayers = compiledModel.outputs;\n\nconst [height, width] = inputLayer.shape.slice(2);\n\nconst heatmapsOutputKey = 'Mconv7_stage2_L2';\n\nconst THRESHOLD = 0.3;\nconst COLOR = 'rgb(0,255,0)';\n" ], "outputs": [] }, @@ -74,9 +59,18 @@ { "language": "typescript", "source": [ - "const imgData = await getImageData(imagePath);\n\nconst originalImage = cv.matFromImageData(imgData);\nconst { cols: originalWidth, rows: originalHeight } = originalImage;\n\nconst image = new cv.Mat();\ncv.cvtColor(originalImage, image, cv.COLOR_RGBA2RGB);\ncv.cvtColor(image, image, cv.COLOR_BGR2RGB);\ncv.resize(image, image, new cv.Size(width, height), cv.INTER_AREA);\n\n// NHWC to NCHW\nconst inputImage = transform(image.data, { width, height }, [0, 1, 2]);" + "const imagePath = '../../assets/images/intel_rnb.jpg';\nconst img = await Image.load(imagePath);\nimg.display(display);\n\n// Resize the image to meet network input size\nconst [inputHeight, inputWidth] = inputLayer.shape.slice(2);\nconst resizedImg = img.resize(inputWidth, inputHeight);\n\n// Prepare input tensor\nconst inputImageTransformedData = transform(\n resizedImg.rgb,\n { width: inputWidth, height: inputHeight },\n [0, 1, 2],\n);\nconst tensorData = new Float32Array(inputImageTransformedData);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n" ], - "outputs": [] + "outputs": [ + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + } + ] }, { "language": "markdown", @@ -88,7 +82,7 @@ { "language": "typescript", "source": [ - "\n// Utility function to get the coordinates of the maximum value in a tensor\nfunction getCoords(tensor) {\n const { values, indices } = tf.topk(tensor.flatten(), 1);\n const [maxVal] = values.dataSync();\n const [maxIndex] = indices.dataSync();\n const x = maxIndex % tensor.shape[1];\n const y = Math.floor(maxIndex / tensor.shape[1]);\n\n return { x, y, confidence: maxVal };\n}\n\n// Draw keypoints on the input image\nfunction drawKeypoints(image, keypoints) {\n keypoints.forEach(keypoint => {\n if (keypoint.confidence > THRESHOLD) {\n cv.circle(image,\n new cv.Point(keypoint.x, keypoint.y),\n 2,\n COLOR,\n 3,\n );\n }\n });\n}\n\n// Draw skeleton (lines between keypoints) on the input image\nfunction drawSkeleton(image, keypoints) {\n const skeleton = [\n [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10],\n [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17],\n ];\n\n skeleton.forEach(([start, end]) => {\n const startPoint = keypoints[start];\n const endPoint = keypoints[end];\n\n if (startPoint.confidence > THRESHOLD\n && endPoint.confidence > THRESHOLD) {\n cv.line(image,\n new cv.Point(startPoint.x, startPoint.y),\n new cv.Point(endPoint.x, endPoint.y),\n COLOR,\n 2,\n );\n }\n });\n}\n\nfunction toTFTensor(ovTensor) {\n return tf.tensor(ovTensor.data, ovTensor.getShape());\n}" + "// Utility function to get the coordinates of the maximum value in a tensor\nfunction getCoords(tensor) {\n const { values, indices } = tf.topk(tensor.flatten(), 1);\n const [maxVal] = values.dataSync();\n const [maxIndex] = indices.dataSync();\n const x = maxIndex % tensor.shape[1];\n const y = Math.floor(maxIndex / tensor.shape[1]);\n\n return { x, y, confidence: maxVal };\n}\n\n// Draw keypoints on the input image\nfunction drawKeypoints(image, keypoints) {\n let modifiedImage = image;\n\n keypoints.forEach(keypoint => {\n if (keypoint.confidence < THRESHOLD) return;\n\n modifiedImage = modifiedImage.drawCircle(\n keypoint.x,\n keypoint.y,\n 4,\n { color: COLOR, width: 3 },\n );\n });\n\n return modifiedImage;\n}\n\n// Draw skeleton (lines between keypoints) on the input image\nfunction drawSkeleton(image, keypoints) {\n const skeleton = [\n [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10],\n [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17],\n ];\n\n let modifiedImage = image;\n skeleton.forEach(([start, end]) => {\n const startPoint = keypoints[start];\n const endPoint = keypoints[end];\n\n if (startPoint.confidence < THRESHOLD\n || endPoint.confidence < THRESHOLD) return;\n\n modifiedImage = modifiedImage.drawLine(\n startPoint.x,\n startPoint.y,\n endPoint.x,\n endPoint.y,\n { color: COLOR, width: 3 },\n );\n });\n\n return modifiedImage;\n}\n\nfunction toTFTensor(ovTensor) {\n return tf.tensor(ovTensor.data, ovTensor.getShape());\n}" ], "outputs": [] }, @@ -102,7 +96,7 @@ { "language": "typescript", "source": [ - "const tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n\nconst inferRequest = compiledModel.createInferRequest();\ninferRequest.setInputTensor(tensor);\nconst outputs = inferRequest.infer();\n\nconst heatmaps = outputs[heatmapsOutputKey];" + "const inferRequest = compiledModel.createInferRequest();\nconst outputs = await inferRequest.inferAsync([tensor]);\n\nconst heatmaps = outputs[heatmapsOutputKey];" ], "outputs": [] }, @@ -116,18 +110,30 @@ { "language": "typescript", "source": [ - "try {\n const outputHeatmapShape = heatmaps.getShape();\n\n // Define constants\n const numKeypoints = 18;\n const heatmapWidth = outputHeatmapShape[3];\n const heatmapHeight = outputHeatmapShape[2];\n\n // Extract keypoints from heatmaps\n let keypoints = [];\n const heatmapsTFTensors = toTFTensor(heatmaps);\n const xCoef = originalWidth / heatmapWidth;\n const yCoef = originalHeight / heatmapHeight;\n\n for (let i = 0; i < numKeypoints; i++) {\n const heatmap = heatmapsTFTensors.slice(\n [0, i, 0, 0],\n [1, 1, heatmapHeight, heatmapWidth]).squeeze();\n\n const { x, y, confidence } = getCoords(heatmap);\n\n keypoints.push({\n x: x * xCoef,\n y: y * yCoef,\n confidence,\n });\n }\n\n drawKeypoints(originalImage, keypoints);\n drawSkeleton(originalImage, keypoints);\n\n displayArrayAsImage(originalImage.data, originalWidth, originalHeight, display);\n} catch(e) {\n console.log(e);\n}" + "try {\n const [N, C, heatmapHeight, heatmapWidth] = heatmaps.getShape();\n\n // Define constants\n const numKeypoints = 18;\n\n // Extract keypoints from heatmaps\n let keypoints = [];\n const heatmapsTFTensors = toTFTensor(heatmaps);\n const xCoef = img.width / heatmapWidth;\n const yCoef = img.height / heatmapHeight;\n\n for (let i = 0; i < numKeypoints; i++) {\n const heatmap = heatmapsTFTensors.slice(\n [0, i, 0, 0],\n [1, 1, heatmapHeight, heatmapWidth]).squeeze();\n\n const { x, y, confidence } = getCoords(heatmap);\n\n keypoints.push({\n x: x * xCoef,\n y: y * yCoef,\n confidence,\n });\n }\n\n let imgWithKeypoints = drawKeypoints(img, keypoints);\n imgWithKeypoints = drawSkeleton(imgWithKeypoints, keypoints);\n\n imgWithKeypoints.display(display);\n} catch(e) {\n console.log(e);\n}" ], "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stderr", + "value": [ + "(node:3778163) [DEP0051] DeprecationWarning: The `util.isNullOrUndefined` API is deprecated. Please use `arg === null || arg === undefined` instead.", + "(Use `node --trace-deprecation ...` to show where the warning was created)", + "" + ] + } + ] + }, { "items": [ { "mime": "image/jpeg", - "value": "" + "value": "" } ] } ] } ] -} \ No newline at end of file +} diff --git a/samples/js/node/notebooks/question-answering.nnb b/samples/js/node/notebooks/question-answering.nnb index 3b53ca163f0826..c1bf61063b5e15 100644 --- a/samples/js/node/notebooks/question-answering.nnb +++ b/samples/js/node/notebooks/question-answering.nnb @@ -17,62 +17,10 @@ { "language": "javascript", "source": [ - "const {\n exp,\n sum,\n tril,\n triu,\n argMax,\n reshape,\n getShape,\n downloadFile,\n extractValues,\n matrixMultiplication,\n} = require('../helpers.js');\nconst tokens = require('./tokens_bert.js');\n\nconst { addon: ov } = require('openvino-node'); \n" + "const {\n exp,\n sum,\n tril,\n triu,\n argMax,\n reshape,\n getShape,\n downloadFile,\n extractValues,\n matrixMultiplication,\n} = require('../helpers.js');\nconst Tokenizer = require('./tokenizer_bert.js');\n\nconst { addon: ov } = require('openvino-node');\n" ], "outputs": [] }, - { - "language": "markdown", - "source": [ - "## Download the Model" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseArtifactsDir = '../../assets/models';\n\nconst modelName = 'bert-small-uncased-whole-word-masking-squad-int8-0002';\nconst modelXMLName = `${modelName}.xml`;\nconst modelBINName = `${modelName}.bin`;\n\nconst modelXMLPath = baseArtifactsDir + '/' + modelXMLName;\n\nconst baseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/bert-small-uncased-whole-word-masking-squad-int8-0002/FP16-INT8/';\n\nawait downloadFile(baseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(baseURL + modelBINName, modelBINName, baseArtifactsDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/models/bert-small-uncased-whole-word-masking-squad-int8-0002.bin'", - "" - ] - } - ] - } - ] - }, - { - "language": "markdown", - "source": [ - "## Download the Vocab" - ], - "outputs": [] - }, - { - "language": "typescript", - "source": [ - "const baseImagesDir = '../../assets/text';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/bert-uncased/vocab.txt';\n\nawait downloadFile(imgUrl, 'vocab.txt', baseImagesDir);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "File successfully stored at '/home/nvishnya/Code/wasm-openvino/samples/js/assets/text/vocab.txt'", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -83,36 +31,10 @@ { "language": "typescript", "source": [ - "const core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\n\nconst _ppp = new ov.preprocess.PrePostProcessor(model);\n_ppp.input(0).tensor().setElementType(ov.element.f32);\n_ppp.input(1).tensor().setElementType(ov.element.f32);\n_ppp.input(2).tensor().setElementType(ov.element.f32);\n_ppp.input(3).tensor().setElementType(ov.element.f32);\n_ppp.build();\n\nconst compiledModel = await core.compileModel(model, 'CPU');\n\nconst inputs = compiledModel.inputs;\nconst outputs = compiledModel.outputs;\n\nconst inputSize = compiledModel.input(0).shape[1];\n" + "const modelXMLPath =\n '../../assets/models/bert-small-uncased-whole-word-masking-squad-0001.xml',\n\nconst core = new ov.Core();\nconst model = await core.readModel(modelXMLPath);\n\nconst _ppp = new ov.preprocess.PrePostProcessor(model);\n_ppp.input(0).tensor().setElementType(ov.element.f32);\n_ppp.input(1).tensor().setElementType(ov.element.f32);\n_ppp.input(2).tensor().setElementType(ov.element.f32);\n_ppp.build();\n\nconst compiledModel = await core.compileModel(model, 'AUTO');\n\nconst { inputs, outputs } = compiledModel;\nconst inputSize = compiledModel.input(0).shape[1];\n\nconsole.log('Input size:', inputSize);\n" ], "outputs": [] }, - { - "language": "javascript", - "source": [ - "console.log('=== Model Inputs:');\ninputs.forEach(i => console.log(`${i}`));\nconsole.log('=== Model Outputs:');\noutputs.forEach(o => console.log(`${o}`));\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "=== Model Inputs:", - "input_ids", - "attention_mask", - "token_type_ids", - "position_ids", - "=== Model Outputs:", - "output_s", - "output_e", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -123,14 +45,14 @@ { "language": "javascript", "source": [ - "// The path to the vocabulary file.\nconst vocabFilePath = \"../../assets/text/vocab.txt\";\n\n// Create a dictionary with words and their indices.\nconst vocab = await tokens.loadVocabFile(vocabFilePath);\n\n// Define special tokens.\nconst clsToken = vocab[\"[CLS]\"];\nconst padToken = vocab[\"[PAD]\"];\nconst sepToken = vocab[\"[SEP]\"];\n\n// A function to load text from given urls.\nfunction loadContext(sources) {\n const input_urls = [];\n const paragraphs = [];\n \n for (source of sources) {\n paragraphs.push(source);\n\n // Produce one big context string.\n return paragraphs.join('\\n');\n }\n}\n" + "// Initialize BERT tokenizer\nconst tokenizer = await Tokenizer.load('../../assets/vocab/vocab.txt');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Preprocessing\n\nThe input size in this case is 384 tokens long. The main input (`input_ids`) to used BERT model consists of two parts: question tokens and context tokens separated by some special tokens. \n\nIf `question + context` are shorter than 384 tokens, padding tokens are added. If `question + context` is longer than 384 tokens, the context must be split into parts and the question with different parts of context must be fed to the network many times. \n\nUse overlapping, so neighbor parts of the context are overlapped by half size of the context part (if the context part equals 300 tokens, neighbor context parts overlap with 150 tokens). You also need to provide the following sequences of integer values: \n\n- `attention_mask` - a sequence of integer values representing the mask of valid values in the input. \n- `token_type_ids` - a sequence of integer values representing the segmentation of `input_ids` into question and context. \n- `position_ids` - a sequence of integer values from 0 to 383 representing the position index for each input token. \n\nFor more information, refer to the **Input** section of [BERT model documentation](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/bert-small-uncased-whole-word-masking-squad-int8-0002#input)." + "## Preprocessing\n\nThe input size in this case is 384 tokens long. The main input (`input_ids`) to used BERT model consists of two parts: question tokens and context tokens separated by some special tokens. \n\nIf `question + context` are shorter than 384 tokens, padding tokens are added. If `question + context` is longer than 384 tokens, the context must be split into parts and the question with different parts of context must be fed to the network many times. \n\nUse overlapping, so neighbor parts of the context are overlapped by half size of the context part (if the context part equals 300 tokens, neighbor context parts overlap with 150 tokens). You also need to provide the following sequences of integer values: \n\n- `attention_mask` - a sequence of integer values representing the mask of valid values in the input. \n- `token_type_ids` - a sequence of integer values representing the segmentation of `input_ids` into question and context.\n\nFor more information, refer to the **Input** section of [BERT model documentation](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/bert-small-uncased-whole-word-masking-squad-int8-0002#input)." ], "outputs": [] }, @@ -158,14 +80,14 @@ { "language": "javascript", "source": [ - "// A function to add padding.\nfunction pad({ inputIds, attentionMask, tokenTypeIds }) {\n // How many padding tokens.\n const diffInputSize = inputSize - inputIds.length;\n\n if (diffInputSize > 0) {\n // Add padding to all the inputs.\n inputIds = inputIds.concat(Array(diffInputSize).fill(padToken));\n attentionMask = attentionMask.concat(Array(diffInputSize).fill(0));\n tokenTypeIds = tokenTypeIds.concat(Array(diffInputSize).fill(0));\n }\n\n return [inputIds, attentionMask, tokenTypeIds, diffInputSize];\n}\n" + "// A function to add padding.\nfunction pad({ inputIds, attentionMask, tokenTypeIds }) {\n // How many padding tokens.\n const diffInputSize = inputSize - inputIds.length;\n\n if (diffInputSize > 0) {\n // Add padding to all the inputs.\n inputIds = inputIds.concat(Array(diffInputSize).fill(tokenizer.padToken));\n attentionMask = attentionMask.concat(Array(diffInputSize).fill(0));\n tokenTypeIds = tokenTypeIds.concat(Array(diffInputSize).fill(0));\n }\n\n return [inputIds, attentionMask, tokenTypeIds, diffInputSize];\n}\n" ], "outputs": [] }, { "language": "javascript", "source": [ - "// A generator of a sequence of inputs.\nfunction* prepareInput(questionTokens, contextTokens) {\n // A length of question in tokens.\n const questionLen = questionTokens.length;\n // The context part size.\n const contextLen = inputSize - questionLen - 3;\n\n if (contextLen < 16)\n throw new Error('Question is too long in comparison to input size. No space for context');\n\n const inputLayerNames = inputs.map(i => i.toString());\n\n // Take parts of the context with overlapping by 0.5.\n const max = Math.max(1, contextTokens.length - contextLen);\n\n for (let start = 0; start < max; start += parseInt(contextLen / 2)) {\n // A part of the context.\n const partContextTokens = contextTokens.slice(start, start + contextLen);\n // The input: a question and the context separated by special tokens.\n let inputIds = [clsToken, ...questionTokens, sepToken, ...partContextTokens, sepToken];\n // 1 for any index if there is no padding token, 0 otherwise.\n let attentionMask = Array(inputIds.length).fill(1);\n // 0 for question tokens, 1 for context part.\n let tokenTypeIds = [...Array(questionLen + 2).fill(0), ...Array(partContextTokens.length + 1).fill(1)];\n\n let padNumber = 0;\n\n // Add padding at the end.\n [inputIds, attentionMask, tokenTypeIds, padNumber] = pad({ inputIds, attentionMask, tokenTypeIds });\n\n // Create an input to feed the model.\n const inputDict = {\n 'input_ids': new Float32Array(inputIds),\n 'attention_mask': new Float32Array(attentionMask),\n 'token_type_ids': new Float32Array(tokenTypeIds),\n };\n\n // Some models require additional position_ids.\n if (inputLayerNames.includes('position_ids')) {\n positionIds = inputIds.map((_, index) => index);\n inputDict['position_ids'] = new Float32Array(positionIds);\n }\n\n yield [inputDict, padNumber, start];\n }\n}\n" + "// A generator of a sequence of inputs.\nfunction* prepareInput(questionTokens, contextTokens) {\n // A length of question in tokens.\n const questionLen = questionTokens.length;\n // The context part size.\n const contextLen = inputSize - questionLen - 3;\n\n if (contextLen < 16)\n throw new Error('Question is too long in comparison to input size. No space for context');\n\n const inputLayerNames = inputs.map(i => i.toString());\n\n // Take parts of the context with overlapping by 0.5.\n const max = Math.max(1, contextTokens.length - contextLen);\n\n for (let start = 0; start < max; start += parseInt(contextLen / 2)) {\n // A part of the context.\n const partContextTokens = contextTokens.slice(start, start + contextLen);\n // The input: a question and the context separated by special tokens.\n let inputIds = [\n tokenizer.clsToken,\n ...questionTokens,\n tokenizer.sepToken,\n ...partContextTokens,\n tokenizer.sepToken,\n ];\n // 1 for any index if there is no padding token, 0 otherwise.\n let attentionMask = Array(inputIds.length).fill(1);\n // 0 for question tokens, 1 for context part.\n let tokenTypeIds = [...Array(questionLen + 2).fill(0), ...Array(partContextTokens.length + 1).fill(1)];\n\n let padNumber = 0;\n\n // Add padding at the end.\n [inputIds, attentionMask, tokenTypeIds, padNumber] = pad({ inputIds, attentionMask, tokenTypeIds });\n\n // Create an input to feed the model.\n const inputDict = {\n 'input_ids': new Float32Array(inputIds),\n 'attention_mask': new Float32Array(attentionMask),\n 'token_type_ids': new Float32Array(tokenTypeIds),\n };\n\n // Some models require additional position_ids.\n if (inputLayerNames.includes('position_ids')) {\n positionIds = inputIds.map((_, index) => index);\n inputDict['position_ids'] = new Float32Array(positionIds);\n }\n\n yield [inputDict, padNumber, start];\n }\n}\n" ], "outputs": [] }, @@ -186,7 +108,7 @@ { "language": "javascript", "source": [ - "function getBestAnswer(question, context) {\n // Convert the context string to tokens.\n const [contextTokens, contextTokensStartEnd] = tokens.textToTokens(context.toLowerCase(), vocab);\n // Convert the question string to tokens.\n const [questionTokens] = tokens.textToTokens(question.toLowerCase(), vocab);\n\n const results = [];\n // Iterate through different parts of the context.\n for ([networkInput, padding, startIdx] of prepareInput(questionTokens, contextTokens)) {\n // Get output layers.\n const outputStartKey = compiledModel.output('output_s');\n const outputEndKey = compiledModel.output('output_e');\n\n // OpenVINO inference.\n const inferRequest = compiledModel.createInferRequest();\n\n const transformedInput = {\n 'input_ids': new ov.Tensor(ov.element.f32, [1, 384], networkInput['input_ids']),\n 'attention_mask': new ov.Tensor(ov.element.f32, [1, 384], networkInput['attention_mask']),\n 'token_type_ids': new ov.Tensor(ov.element.f32, [1, 384], networkInput['token_type_ids']),\n 'position_ids': new ov.Tensor(ov.element.f32, [1, 384], networkInput['position_ids']),\n }\n\n inferRequest.infer(transformedInput);\n\n const resultStart = inferRequest.getTensor(outputStartKey).data;\n const resultEnd = inferRequest.getTensor(outputEndKey).data;\n\n // Postprocess the result, getting the score and context range for the answer.\n const scoreStartEnd = postprocess(resultStart,\n resultEnd,\n questionTokens,\n contextTokensStartEnd,\n padding,\n startIdx);\n results.push(scoreStartEnd);\n }\n\n // Find the highest score.\n const scores = results.map(r => r[0]);\n const maxIndex = scores.indexOf(Math.max(scores));\n\n const answer = results[maxIndex];\n // Return the part of the context, which is already an answer.\n return [context.slice(answer[1], answer[2]), answer[0]];\n}\n" + "function getBestAnswer(question, context) {\n // Convert the context string to tokens.\n const [contextTokens, contextTokensStartEnd] = tokenizer.tokenize(context.toLowerCase());\n // Convert the question string to tokens.\n const [questionTokens] = tokenizer.tokenize(question.toLowerCase());\n\n // Get output layers.\n const outputStartKey = compiledModel.output('output_s');\n const outputEndKey = compiledModel.output('output_e');\n\n const inferRequest = compiledModel.createInferRequest();\n\n const results = [];\n const preparedInput = prepareInput(questionTokens, contextTokens);\n\n // Iterate through different parts of the context.\n for ([networkInput, padding, startIdx] of preparedInput) {\n // OpenVINO inference\n inferRequest.infer({\n 'input_ids': new ov.Tensor(ov.element.f32, [1, inputSize], networkInput['input_ids']),\n 'attention_mask': new ov.Tensor(ov.element.f32, [1, inputSize], networkInput['attention_mask']),\n 'token_type_ids': new ov.Tensor(ov.element.f32, [1, inputSize], networkInput['token_type_ids']),\n });\n\n const resultStartData = inferRequest.getTensor(outputStartKey).data;\n const resultEndData = inferRequest.getTensor(outputEndKey).data;\n\n // Postprocess the result, getting the score and context range for the answer.\n const scoreStartEnd = postprocess(resultStartData,\n resultEndData,\n questionTokens,\n contextTokensStartEnd,\n padding,\n startIdx);\n\n results.push(scoreStartEnd);\n }\n\n // Find the highest score.\n const scores = results.map(r => r[0]);\n const maxIndex = argMax(scores);\n\n const answer = results[maxIndex];\n // Return the part of the context, which is already an answer.\n return [context.slice(answer[1], answer[2]), answer[0]];\n}\n" ], "outputs": [] }, @@ -200,23 +122,9 @@ { "language": "javascript", "source": [ - "function runQuestionAnswering(sources, exampleQuestion) {\n console.log(`Context: ${sources}`);\n const context = loadContext(sources);\n\n if (!context.length)\n return console.log('Error: Empty context or outside paragraphs');\n\n if (exampleQuestion) {\n const startTime = process.hrtime.bigint();\n const [answer, score] = getBestAnswer(exampleQuestion, context);\n const execTime = Number(process.hrtime.bigint() - startTime) / 1e9;\n\n console.log(`Question: ${exampleQuestion}`);\n console.log(`Answer: ${answer}`);\n console.log(`Score: ${score}`);\n console.log(`Time: ${execTime}s`);\n }\n}\n\nconst sources = [\"Computational complexity theory is a branch of the theory of computation in theoretical computer \" +\n \"science that focuses on classifying computational problems according to their inherent difficulty, \" +\n \"and relating those classes to each other. A computational problem is understood to be a task that \" +\n \"is in principle amenable to being solved by a computer, which is equivalent to stating that the \" +\n \"problem may be solved by mechanical application of mathematical steps, such as an algorithm.\"]\n\nrunQuestionAnswering(sources, 'What is the term for a task that generally lends itself to being solved by a computer?');\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "Context: Computational complexity theory is a branch of the theory of computation in theoretical computer science that focuses on classifying computational problems according to their inherent difficulty, and relating those classes to each other. A computational problem is understood to be a task that is in principle amenable to being solved by a computer, which is equivalent to stating that the problem may be solved by mechanical application of mathematical steps, such as an algorithm.", - "Score: 0.5286847737759395", - "Time: 0.045750747s", - "" - ] - } - ] - } - ] + "function runQuestionAnswering(sources, exampleQuestion) {\n const context = sources.join('\\n');\n\n if (!context.length)\n return console.log('Error: Empty context or outside paragraphs');\n\n if (exampleQuestion) {\n const startTime = process.hrtime.bigint();\n const [answer, score] = getBestAnswer(exampleQuestion, context);\n const execTime = Number(process.hrtime.bigint() - startTime) / 1e9;\n\n console.log(`Question: ${exampleQuestion}`);\n console.log(`Answer: ${answer}`);\n console.log(`Score: ${score}`);\n console.log(`Time: ${execTime}s`);\n }\n}\n\nconst context = [\n 'Computational complexity theory is a branch of the theory of computation in ' +\n 'theoretical computer science that focuses on classifying computational ' +\n 'problems according to their inherent difficulty and relating those classes ' +\n 'to each other. A computational problem is understood to be a task that is in ' +\n 'principle amenable to being solved by a computer, which is equivalent to ' +\n 'stating that the problem may be solved by mechanical application of ' +\n 'mathematical steps, such as an algorithm.',\n];\n\nconst question = 'What is the term for a task that generally lends itself to being solved by a computer?';\n\ntry {\n runQuestionAnswering(context, question);\n} catch (error) {\n console.error(error);\n}\n" + ], + "outputs": [] } ] -} +} \ No newline at end of file diff --git a/samples/js/node/notebooks/tokens_bert.js b/samples/js/node/notebooks/tokenizer_bert.js similarity index 79% rename from samples/js/node/notebooks/tokens_bert.js rename to samples/js/node/notebooks/tokenizer_bert.js index 10dc250abe8e51..b23bcbfc8519d2 100644 --- a/samples/js/node/notebooks/tokens_bert.js +++ b/samples/js/node/notebooks/tokenizer_bert.js @@ -1,10 +1,39 @@ const fs = require('node:fs/promises'); -exports.cleanWord = cleanWord; -exports.encodeByVoc = encodeByVoc; -exports.textToTokens = textToTokens; -exports.splitToWords = splitToWords; -exports.loadVocabFile = loadVocabFile; +class Tokenizer { + constructor(vocab, original) { + this.vocab = vocab; + this.original = original; + } + + get clsToken() { + return this.vocab["[CLS]"]; + } + + get padToken() { + return this.vocab["[PAD]"]; + } + + get sepToken() { + return this.vocab["[SEP]"]; + } + + tokenize(text) { + return textToTokens(text, this.vocab); + } + + detokenize(tokens) { + return tokens.map(t => this.original[t]).join(' '); + } + + static async load(path) { + const { vocab, original } = await loadVocabFile(path); + + return new Tokenizer(vocab, original); + } +} + +module.exports = Tokenizer; // Load vocabulary file for encoding async function loadVocabFile(vocabFileName) { @@ -17,7 +46,7 @@ async function loadVocabFile(vocabFileName) { vocab[token] = index; }); - return vocab; + return { vocab, original: lines }; } // Remove mark and control chars diff --git a/samples/js/node/notebooks/vision-background-removal.nnb b/samples/js/node/notebooks/vision-background-removal.nnb index 92e4da28d1b4e9..fea5ba9fc5dd68 100644 --- a/samples/js/node/notebooks/vision-background-removal.nnb +++ b/samples/js/node/notebooks/vision-background-removal.nnb @@ -10,38 +10,23 @@ { "language": "typescript", "source": [ - "const { cv } = require(\"opencv-wasm\");\nconst fs = require(\"fs\");\nconst { addon: ov } = require(\"openvino-node\");\nconst { display } = require(\"node-kernel\");\nconst {\n downloadFile,\n getImageData,\n transform,\n setShape,\n displayArrayAsImage,\n} = require(\"../helpers\");\n" + "const { addon: ov } = require('openvino-node');\nconst { display } = require('node-kernel');\n\nconst { transform } = require('../helpers');\nconst Image = require('../image');\n" ], "outputs": [] }, { "language": "markdown", "source": [ - "## Download Images and Model" + "## Load and Compile Unet Model" ], "outputs": [] }, { "language": "typescript", "source": [ - "const baseArtifactsDir = \"../../assets/models\";\nconst baseImagesDir = \"../../assets/images\";\n\nconst modelXMLName = \"unet_ir_model.xml\";\nconst modelBINName = \"unet_ir_model.bin\";\nconst modelXMLPath = `${baseArtifactsDir}/${modelXMLName}`;\nconst modelBinPath = `${baseArtifactsDir}/${modelBINName}`;\n\nconsole.log(`Model XML path: ${modelXMLPath}`);\nconsole.log(`Model BIN path: ${modelBinPath}`);\n\nconst foregroundImgUrl =\n \"https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_hollywood.jpg\";\nconst backgroundImgUrl =\n \"https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/wall.jpg\";\nconst modelBaseURL =\n \"https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/vision-background-removal/\";\n\nawait downloadFile(foregroundImgUrl, \"coco_hollywood.jpg\", baseImagesDir);\nawait downloadFile(backgroundImgUrl, \"wall.jpg\", baseImagesDir);\n\nawait downloadFile(modelBaseURL + modelXMLName, modelXMLName, baseArtifactsDir);\nawait downloadFile(modelBaseURL + modelBINName, modelBINName, baseArtifactsDir);\n" + "const modelXMLPath = '../../assets/models/unet_ir_model.xml';\n\nconst core = new ov.Core();\n\n// Read and compile model\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, 'AUTO');\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n\nconsole.log(`inputLayer: ${inputLayer}`);\nconsole.log(`outputLayer: ${outputLayer}`);\n" ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "Model XML path: ../assets/models/unet_ir_model.xml", - "Model BIN path: ../assets/models/unet_ir_model.bin", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample-clone/ocr-node-sample/assets/images/coco_hollywood.jpg'", - "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample-clone/ocr-node-sample/assets/images/wall.jpg'", - "" - ] - } - ] - } - ] + "outputs": [] }, { "language": "markdown", @@ -53,37 +38,10 @@ { "language": "typescript", "source": [ - "function normalizeImage(imageData, width, height) {\n // Mean and scale values\n const inputMean = [123.675, 116.28, 103.53];\n const inputScale = [58.395, 57.12, 57.375];\n\n const normalizedData = new Float32Array(imageData.length);\n const channels = 3;\n\n for (let i = 0; i < height; i++) {\n for (let j = 0; j < width; j++) {\n for (let c = 0; c < channels; c++) {\n const index = i * width * channels + j * channels + c;\n normalizedData[index] =\n (imageData[index] - inputMean[c]) / inputScale[c];\n }\n }\n }\n\n return normalizedData;\n}\n\nfunction removeBackground(mask, image) {\n // Iterate over the mask and set all background pixels to white\n for (let i = 0; i < mask.rows; i++) {\n for (let j = 0; j < mask.cols; j++) {\n if (mask.ucharPtr(i, j)[0] === 0) {\n image.ucharPtr(i, j)[0] = 255;\n image.ucharPtr(i, j)[1] = 255;\n image.ucharPtr(i, j)[2] = 255;\n }\n }\n }\n}\n\nfunction removeForeground(mask, image) {\n // Iterate over the mask and set all foreground pixels to black\n for (let i = 0; i < mask.rows; i++) {\n for (let j = 0; j < mask.cols; j++) {\n if (mask.ucharPtr(i, j)[0] === 1) {\n image.ucharPtr(i, j)[0] = 0;\n image.ucharPtr(i, j)[1] = 0;\n image.ucharPtr(i, j)[2] = 0;\n } else {\n image.ucharPtr(i, j)[0] = image.ucharPtr(i, j)[0];\n image.ucharPtr(i, j)[1] = image.ucharPtr(i, j)[1];\n image.ucharPtr(i, j)[2] = image.ucharPtr(i, j)[2];\n }\n }\n }\n}\n\nfunction combineImages(mask, fgImage, bgImage, newImage) {\n // Iterate over the mask and combine the foreground and background images\n for (let i = 0; i < mask.rows; i++) {\n for (let j = 0; j < mask.cols; j++) {\n if (mask.ucharPtr(i, j)[0] === 1) {\n newImage.ucharPtr(i, j)[0] = fgImage.ucharPtr(i, j)[0];\n newImage.ucharPtr(i, j)[1] = fgImage.ucharPtr(i, j)[1];\n newImage.ucharPtr(i, j)[2] = fgImage.ucharPtr(i, j)[2];\n } else {\n newImage.ucharPtr(i, j)[0] = bgImage.ucharPtr(i, j)[0];\n newImage.ucharPtr(i, j)[1] = bgImage.ucharPtr(i, j)[1];\n newImage.ucharPtr(i, j)[2] = bgImage.ucharPtr(i, j)[2];\n }\n }\n }\n}\n" - ], - "outputs": [] - }, - { - "language": "markdown", - "source": [ - "## Load and Compile Unet Model" + "// Details about this normalization:\n// https://docs.openvino.ai/2024/notebooks/vision-background-removal-with-output.html#load-and-pre-process-input-image\nfunction normalizeImage(imageData, width, height) {\n // Mean and scale values\n const inputMean = [123.675, 116.28, 103.53];\n const inputScale = [58.395, 57.12, 57.375];\n\n const normalizedData = new Float32Array(imageData.length);\n const channels = 3;\n\n for (let i = 0; i < height; i++) {\n for (let j = 0; j < width; j++) {\n for (let c = 0; c < channels; c++) {\n const index = i * width * channels + j * channels + c;\n\n normalizedData[index] =\n (imageData[index] - inputMean[c]) / inputScale[c];\n }\n }\n }\n\n return normalizedData;\n}" ], "outputs": [] }, - { - "language": "typescript", - "source": [ - "const core = new ov.Core();\n\n// Read and compile model\nconst model = await core.readModel(modelXMLPath);\nconst compiledModel = await core.compileModel(model, \"CPU\");\nconst inputLayer = compiledModel.input(0);\nconst outputLayer = compiledModel.output(0);\n\nconsole.log(`inputLayer: ${inputLayer}`);\nconsole.log(`outputLayer: ${outputLayer}`);\n" - ], - "outputs": [ - { - "items": [ - { - "mime": "application/vnd.code.notebook.stdout", - "value": [ - "inputLayer: x", - "outputLayer: test_0", - "" - ] - } - ] - } - ] - }, { "language": "markdown", "source": [ @@ -94,7 +52,7 @@ { "language": "typescript", "source": [ - "// Get Image data from the foreground image\n\nconst fgrImageData = await getImageData(`${baseImagesDir}/coco_hollywood.jpg`);\nconst inputImageMat = cv.matFromImageData(fgrImageData);\nconst originalImageDisplayMat = inputImageMat.clone();\n\n// Convert the image shape to a shape and a data type expected by the network\nconst [B, C, H, W] = inputLayer.shape;\nconst resizedImage = new cv.Mat();\n\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB);\ncv.resize(inputImageMat, resizedImage, new cv.Size(W, H));\n\nlet inputImage = transform(\n resizedImage.data,\n { width: W, height: H },\n [0, 1, 2]\n);\n\ninputImage = normalizeImage(inputImage, W, H);\n\nconst tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData);\n\n" + "const foregroundImagePath = '../../assets/images/coco_hollywood.jpg';\n\n// Load foreground image\nconst originalImg = await Image.load(foregroundImagePath);\n\n// Resize image to a shape expected by the network\nconst [modelInputHeight, modelInputWidth] = inputLayer.shape.slice(2);\nconst resized = await originalImg.resize(modelInputWidth, modelInputHeight);\n\n// Create a tensor from the normalized input image\nconst transformed = transform(\n resized.rgb,\n {\n width: modelInputWidth,\n height: modelInputHeight\n },\n [0, 1, 2]\n);\nconst normalizedInputImage = normalizeImage(\n transformed,\n modelInputWidth,\n modelInputHeight,\n);\nconst tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, normalizedInputImage);\n" ], "outputs": [] }, @@ -108,26 +66,9 @@ { "language": "typescript", "source": [ - "// Do inference\nconst inferRequest = compiledModel.createInferRequest();\nconst inferResult = await inferRequest.inferAsync([tensor]);\n\nconst { data } = inferResult[outputLayer];\nconst reshapedResult = setShape(data, [512, 512]);\n\n// Create a Mat from the reshaped result\nconst reshapedMat = cv.matFromArray(512, 512, cv.CV_32F, reshapedResult.flat());\n\n// Get the height and width of the original image\nconst height = inputImageMat.rows;\nconst width = inputImageMat.cols;\n\n// Resize the inference result to the original image size\nconst resizedResult = new cv.Mat();\ncv.resize(\n reshapedMat,\n resizedResult,\n new cv.Size(width, height),\n 0,\n 0,\n cv.INTER_LINEAR\n);\n\n// Convert the resized result to uint8\nresizedResult.convertTo(resizedResult, cv.CV_8U);\n\n// Create a Mat to store the background removed result\nconst bgRemovedResult = originalImageDisplayMat.clone();\n\nremoveBackground(resizedResult, bgRemovedResult);\n\ndisplayArrayAsImage(\n originalImageDisplayMat.data,\n originalImageDisplayMat.cols,\n originalImageDisplayMat.rows,\n display\n);\ndisplayArrayAsImage(\n bgRemovedResult.data,\n bgRemovedResult.cols,\n bgRemovedResult.rows,\n display\n);\n" + "const inferRequest = compiledModel.createInferRequest();\n const inferResult = await inferRequest.inferAsync([tensor]);\n const { data: resultData } = inferResult[outputLayer];\n\n // Normalize the result data from grayscale to RGB\n const rgbData = [];\n for (let i = 0; i < resultData.length; i += 1) {\n const value = resultData[i] * 255;\n\n rgbData.push(value, value, value, 255);\n }\n\n // Create image based on result data\n const [outputHeight, outputWidth] = outputLayer.shape.slice(2);\n const maskImg = await Image.fromArray(rgbData, outputWidth, outputHeight);\n\n // Resize the result mask to the original image size and save it\n const { width, height } = originalImg;\n const resizedMaskImg = await maskImg.resize(originalImg.width, originalImg.height);\n resizedMaskImg.display(display);\n\n // Remove the foreground from the original image\n const removedBgImg = Image.mask(originalImg, resizedMaskImg);\n removedBgImg.display(display);\n" ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - }, - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] + "outputs": [] }, { "language": "markdown", @@ -139,26 +80,9 @@ { "language": "typescript", "source": [ - "// Get the background image data\nconst bgrImageData = await getImageData(`${baseImagesDir}/wall.jpg`);\nconst bgrImageMat = cv.matFromImageData(bgrImageData);\n\n// Resize the background image to the original image size\nconst resizedBgrImageMat = new cv.Mat();\ncv.resize(bgrImageMat, resizedBgrImageMat, new cv.Size(width, height));\n\n// Remove the foreground from the background image by\n// setting all foreground pixels to white\nremoveForeground(resizedResult, resizedBgrImageMat);\n\ndisplayArrayAsImage(\n resizedBgrImageMat.data,\n resizedBgrImageMat.cols,\n resizedBgrImageMat.rows,\n display\n);\n\n// create a new Mat to store the final image\nconst newImage = new cv.Mat(\n resizedBgrImageMat.rows,\n resizedBgrImageMat.cols,\n cv.CV_8UC3\n);\n\n// combine the foreground and background images to get the final image\ncombineImages(resizedResult, bgRemovedResult, resizedBgrImageMat, newImage);\n\ndisplayArrayAsImage(newImage.data, newImage.cols, newImage.rows, display);\n" + "const backgroundImagePath = '../../assets/images/wall.jpg';\n\n// Load the background image\nconst bgrImage = await Image.load(backgroundImagePath);\n\n// Resize the background image to the same size as the original image\nconst resizedBgrImage = bgrImage.resize(width, height);\n\n// Remove object from the background image\nconst removedFgImg = Image.mask(resizedBgrImage, resizedMaskImg.invert());\nremovedFgImg.display(display);\n\n// Combine the background and foreground images\nconst resultImg = Image.merge(removedBgImg, removedFgImg);\nresultImg.display(display);\n" ], - "outputs": [ - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - }, - { - "items": [ - { - "mime": "image/jpeg", - "value": "" - } - ] - } - ] + "outputs": [] } ] } \ No newline at end of file diff --git a/samples/js/node/optical_character_recognition/README.md b/samples/js/node/optical_character_recognition/README.md index 130566ca0bcd4c..c397bb3c846db3 100644 --- a/samples/js/node/optical_character_recognition/README.md +++ b/samples/js/node/optical_character_recognition/README.md @@ -1,6 +1,10 @@ # Optical Character Recognition Node.js Sample -Run: +Run sample: ```bash -node hello_reshape_ssd.js *path_to_detection_model_file* *path_to_recognition_model_file* *path_to_img* AUTO -``` \ No newline at end of file +node optical-character-recognition.js ../../assets/models/horizontal-text-detection-0001.xml ../../assets/models/text-recognition-resnet-fc.xml ../../assets/images/intel_rnb.jpg AUTO +``` +Where: +```bash +node optical-character-recognition.js *path_to_detection_model_file* *path_to_recognition_model_file* *path_to_img* *device* +``` diff --git a/samples/js/node/optical_character_recognition/optical-character-recognition.js b/samples/js/node/optical_character_recognition/optical-character-recognition.js index 5e371c1975a993..a5665afc67fa84 100644 --- a/samples/js/node/optical_character_recognition/optical-character-recognition.js +++ b/samples/js/node/optical_character_recognition/optical-character-recognition.js @@ -1,14 +1,10 @@ -const { addon: ov } = require('openvino-node'); -const fs = require('node:fs'); const path = require('node:path'); -const { createCanvas, ImageData } = require('canvas'); -const { cv } = require('opencv-wasm'); -const { - transform, - getImageData, - argMax, - setShape, -} = require('../helpers.js'); +const { addon: ov } = require('openvino-node'); + +const Image = require('../image.js'); +const { transform, argMax, setShape } = require('../helpers.js'); + +const OUTPUT_PATH = './output/'; if (require.main === module) { // Parsing and validation of input arguments @@ -32,110 +28,91 @@ if (require.main === module) { } async function main(detModelXMLPath, recModelXMLPath, imagePath, deviceName) { - // Initialize OpenVINO core and load the detection mode + // Initialize OpenVINO Core const core = new ov.Core(); + // Load the detection model const detModel = await core.readModel(detModelXMLPath); const detCompiledModel = await core.compileModel(detModel, deviceName); const detInputLayer = detCompiledModel.input(0); const detOutputLayer = detCompiledModel.output('boxes'); - const imageData = await getImageData(imagePath); - const inputImageMat = cv.matFromImageData(imageData); + const img = await Image.load(imagePath); // Resize the image to meet network input size - const [, , H, W] = detInputLayer.shape; - const resizedImage = new cv.Mat(); - cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB); - cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB); - cv.resize(inputImageMat, resizedImage, new cv.Size(W, H)); + const [, , detInputHeight, detInputWidth] = detInputLayer.shape; + const resizedImg = img.resize(detInputWidth, detInputHeight); // Prepare input tensor - const inputImage = transform( - resizedImage.data, - { width: W, height: H }, + const inputImageTransformedData = transform( + resizedImg.rgb, + { width: detInputWidth, height: detInputHeight }, [0, 1, 2], ); - const tensorData = new Float32Array(inputImage); + const tensorData = new Float32Array(inputImageTransformedData); const tensor = new ov.Tensor(ov.element.f32, detInputLayer.shape, tensorData); + // Run inference on the detection model const detInferRequest = detCompiledModel.createInferRequest(); - const detResult = await detInferRequest.inferAsync([tensor]); - const boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]); + // Load the recognition model const recModel = await core.readModel(recModelXMLPath); const recModelCompiled = await core.compileModel(recModel, deviceName); - const recInputLayer = recModelCompiled.input(0); - const recOutputLayer = recModelCompiled.output(0); + const recInferRequest = recModelCompiled.createInferRequest(); - // Process each bounding box and run inference on the recognition model - const [, , height, width] = recInputLayer.shape; // Calculate ratios - const { ratioX, ratioY } = calculateRatios(inputImageMat, resizedImage); + const [ratioX, ratioY] = + [img.width / detInputWidth, img.height / detInputHeight]; + const boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]); + // Resize bounding boxes to the original image size + const boundingBoxesOriginalSizeArray = boundingBoxesArray.map(box => + [...multiplyByRatio(ratioX, ratioY, box), box[4]]); - // Convert image to grayscale - const grayscaleImage = convertToGrayscale(inputImageMat); + // Process each bounding box and run inference on the recognition model + const boxesWithAnnotations = []; + for (let i = 0; i < boundingBoxesOriginalSizeArray.length; i++) { + const box = boundingBoxesOriginalSizeArray[i]; + const [xMin, yMin, xMax, yMax] = box; + const croppedImg = img.crop(xMin, yMin, xMax - xMin, yMax - yMin); + await croppedImg.save(OUTPUT_PATH + `cropped_image_${i}.jpg`); - const annotations = []; - const croppedImages = []; + const annotation = + await performTextRecognition(recModel, recInferRequest, croppedImg); - for (let i = 0; i < boundingBoxesArray.length; i++) { - const crop = boundingBoxesArray[i]; - const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map( - Math.floor, - ); - const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin); - const croppedImage = grayscaleImage.roi(cropRect); - - try { - const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [ - width, - height, - ]); - const tensorData = new Float32Array(preprocessedCrop); - const tensor = new ov.Tensor( - ov.element.f32, - Int32Array.from(recInputLayer.shape), - tensorData, - ); - - await inferAsyncProcess( - tensor, - recModelCompiled, - recOutputLayer, - i, - annotations, - ); - - croppedImages.push(cropImage(inputImageMat, xMin, yMin, xMax, yMax)); - } catch(error) { - console.error('Error during preprocessing:', error); - } - - croppedImage.delete(); + boxesWithAnnotations.push({ box, annotation }); + + console.log(`Box ${i}: [${box.join(',')}], Annotation: '${annotation}'`); } - grayscaleImage.delete(); + const annotatedImg = await putAnnotationsOnTheImage( + img, + boxesWithAnnotations, + { threshold: 0.3, confLabels: false }, + ); + const savePath = path.join(OUTPUT_PATH, 'output_image.jpg'); + await annotatedImg.save(savePath); + console.log(`The result was saved to ${savePath}`); +} - const boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({ - box, - annotation: annotations[index], - })); +async function performTextRecognition(model, inferenceRequest, img) { + const inputLayerShape = model.input(0).shape; + const outputLayer = model.output(0); - logBoxesWithAnnotations(boxesWithAnnotations); + const [,, inputHeight, inputWidth] = inputLayerShape; + const resizedImg = img.resize(inputWidth, inputHeight); - convertResultToImage( - inputImageMat, - resizedImage, - boxesWithAnnotations, - { threshold: 0.3, confLabels: true }, - './assets/results/output_image.jpg', + // Convert image to grayscale and create tensor + const tensor = new ov.Tensor( + ov.element.f32, + inputLayerShape, + new Float32Array(resizedImg.grayscale), ); - croppedImages.forEach((croppedImage, i) => { - const savePath = `./assets/results/cropped_image_${i}.jpg`; - saveImage(croppedImage, savePath); - }); + const result = await inferenceRequest.inferAsync([tensor]); + const recognitionResults = extractRecognitionResults(result[outputLayer]); + const annotation = parseAnnotations(recognitionResults); + + return annotation; } // Function to extract bounding boxes from the model output @@ -147,56 +124,17 @@ function extractBoundingBoxes(output) { return setShape(boxes, [numberOfBoxes, foldingCoefficient]); } -// Function to calculate the ratios for the image -function calculateRatios(originalImage, resizedImage) { - const realY = originalImage.rows; - const realX = originalImage.cols; - const resizedY = resizedImage.rows; - const resizedX = resizedImage.cols; - const ratioX = realX / resizedX; - const ratioY = realY / resizedY; - - return { ratioX, ratioY }; -} - -// Function to convert the image to grayscale -function convertToGrayscale(originalImage) { - const grayscaleImage = new cv.Mat(); - cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY); - - return grayscaleImage; -} - // Function to adjust bounding box coordinates by a given ratio function multiplyByRatio(ratioX, ratioY, box) { - const scaleShape = (shape, idx) => - idx % 2 ? Math.max(shape * ratioY, 10) : shape * ratioX; - - return box.map(scaleShape); -} - -// Function to resize and convert a crop to the recognition model input format -function resizeAndConvertCropToModelInput(crop, netShape) { - const [netWidth, netHeight] = netShape; + const scaleShape = (shape, idx) => { + const position = idx % 2 + ? Math.max(shape * ratioY, 10) + : shape * ratioX; - // Resize the crop to the network's input shape - const tempImg = new cv.Mat(); - cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight)); - - // Create the reshaped buffer - const reshapedBuffer = new Uint8Array(netHeight * netWidth); - let index = 0; - - for (let i = 0; i < netHeight; i++) { - for (let j = 0; j < netWidth; j++) { - reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0]; - } + return Math.floor(position); } - // Clean up - tempImg.delete(); - - return reshapedBuffer; + return box.map(scaleShape); } // Function to extract recognition results from the model output @@ -219,204 +157,49 @@ function parseAnnotations(recognitionResults) { // Stop if end character is encountered if (parsedLetter === letters[0]) break; + annotation.push(parsedLetter); } return annotation.join(''); } -// Function to crop the image based on the bounding box coordinates -function cropImage(originalImage, xMin, yMin, xMax, yMax) { - xMin = Math.max(0, xMin); - yMin = Math.max(0, yMin); - xMax = Math.min(originalImage.cols, xMax); - yMax = Math.min(originalImage.rows, yMax); - if (xMin >= xMax || yMin >= yMax) { - throw new Error('Invalid crop coordinates'); - } - const roi = originalImage.roi( - new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin), - ); - const cropped = new cv.Mat(); - roi.copyTo(cropped); - roi.delete(); - - return cropped; -} - -// Get Text size -function getTextSize(text, fontFace, fontScale) { - const canvas = createCanvas(200, 200); - const ctx = canvas.getContext('2d'); - const adjustedFontScale = fontScale * 35; - ctx.font = `${adjustedFontScale}px ${fontFace}`; - const metrics = ctx.measureText(text); - const width = metrics.width; - const height = - metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent; - - return { width, height }; -} - -/* The convertResultToImage function visualizes object detection - results on an image by drawing bounding boxes around detected - objects and optionally adding labels to them. */ - -function convertResultToImage( - bgrImage, - resizedImage, - boxesWithAnnotations, - options, - savePath, -) { +// Takes original image and bounding boxes with annotations +// and returns the image with annotations +async function putAnnotationsOnTheImage(img, boxesWithAnnotations, options) { const defaultOptions = { threshold: 0.3, confLabels: true }; const { threshold, confLabels } = Object.assign(defaultOptions, options); - const colors = { - red: [255, 0, 0, 255], - green: [0, 255, 0, 255], - white: [255, 255, 255, 255], - }; - const [realY, realX] = [bgrImage.rows, bgrImage.cols]; - const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols]; - const [ratioX, ratioY] = [realX / resizedX, realY / resizedY]; - - const rgbImage = new cv.Mat(); - cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB); + let finalImage = img; - boxesWithAnnotations.forEach(({ box, annotation }) => { + for (const item of boxesWithAnnotations) { + const { box, annotation } = item; const conf = box[box.length - 1]; - if (conf < threshold) return; - - const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box); - - cv.rectangle( - rgbImage, - new cv.Point(xMin, yMin), - new cv.Point(xMax, yMax), - colors.green, - 3, - ); + if (conf < threshold) continue; - if (!confLabels) return; + const [xMin, yMin, xMax, yMax] = box; + const yOffset = 10; - const text = `${annotation}`; - const fontScale = 0.8; - const thickness = 1; - const { width: textW, height: textH } = getTextSize( - text, - 'Arial', - fontScale, + finalImage = finalImage.drawRect( + xMin, yMin, + xMax - xMin, yMax - yMin, + { color: 'green', width: 3 }, ); - const imageCopy = rgbImage.clone(); - - cv.rectangle( - imageCopy, - new cv.Point(xMin, yMin - textH - 10), - new cv.Point(xMin + textW, yMin - 10), - colors.white, - cv.FILLED, + finalImage = finalImage.drawText( + annotation, + xMin, yMin - yOffset, + { font: '30px Arial' }, ); - cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage); - cv.putText( - rgbImage, - text, - new cv.Point(xMin, yMin - 10), - cv.FONT_HERSHEY_SIMPLEX, - fontScale, - colors.red, - thickness, - cv.LINE_AA, - ); - - imageCopy.delete(); - }); - - const saveDir = path.dirname(savePath); - if (!fs.existsSync(saveDir)) { - fs.mkdirSync(saveDir, { recursive: true }); - } - try { - saveImage(rgbImage, savePath); - } catch(e) { - console.log(`Error occurred while saving ----> ${e}`); - } - - return rgbImage; -} + if (!confLabels) continue; -// Infer async helper function - -async function inferAsyncProcess( - tensor, - recModelCompiled, - recOutputLayer, - i, - annotations, -) { - // Create infer request - const inferRequest = recModelCompiled.createInferRequest(); - - // Define the completion callback function - function completionCallback(outputTensor, i, annotations) { - const recognitionResults = extractRecognitionResults(outputTensor); - const annotation = parseAnnotations(recognitionResults); - annotations.push(annotation); - } - - // Start inference in asynchronous mode - try { - const result = await inferRequest.inferAsync([tensor]); - completionCallback(result[recOutputLayer], i, annotations); - } catch(error) { - console.error('Error during inference:', error); - } -} - -// Log boudning boxes with annotations -function logBoxesWithAnnotations(boxesWithAnnotations) { - boxesWithAnnotations.forEach((item, i) => { - const { box, annotation } = item; - console.log(`Box ${i}: [${box}], Annotation: ${annotation}`); - }); -} - -function saveImage(rgbImage, savePath) { - const canvas = createCanvas(rgbImage.cols, rgbImage.rows); - const ctx = canvas.getContext('2d'); - const componentsPerPixel = - rgbImage.data.length / (rgbImage.cols * rgbImage.rows); - const imgDataArr = []; - - if (componentsPerPixel === 1) { - for (const val of rgbImage.data) { - imgDataArr.push(val, val, val, 255); - } - } else if (componentsPerPixel === 3) { - for (let i = 0; i < rgbImage.data.length; i++) { - if (i % 3 === 0) imgDataArr.push(255); - imgDataArr.push(rgbImage.data[i]); - } - } - - const imageData = new ImageData( - new Uint8ClampedArray(imgDataArr), - rgbImage.cols, - rgbImage.rows, - ); - ctx.putImageData(imageData, 0, 0); - - const dataURL = canvas.toDataURL('image/jpeg'); - const base64Data = dataURL.replace(/^data:image\/jpeg;base64,/, ''); - const imageBuffer = Buffer.from(base64Data, 'base64'); - - const saveDir = path.dirname(savePath); - if (!fs.existsSync(saveDir)) { - fs.mkdirSync(saveDir, { recursive: true }); + finalImage = finalImage.drawText( + conf.toFixed(2), + xMin, yMax + 2 * yOffset, + { font: '20px Arial' }, + ); } - fs.writeFileSync(savePath, imageBuffer); - console.log('Image saved successfully!', savePath); + return finalImage; } diff --git a/samples/js/node/package-lock.json b/samples/js/node/package-lock.json index fe003eaac14935..96a013fb0435c7 100644 --- a/samples/js/node/package-lock.json +++ b/samples/js/node/package-lock.json @@ -7,14 +7,14 @@ "": { "name": "openvino-node-demo", "version": "1.0.0", + "hasInstallScript": true, "license": "Apache-2.0", "devDependencies": { + "@napi-rs/canvas": "^0.1.59", "@tensorflow/tfjs-node": "^4.19.0", "args": "^5.0.3", - "canvas": "^2.11.2", "eslint": "^8.39.0", "https-proxy-agent": "^7.0.2", - "opencv-wasm": "^4.3.0-10", "openvino-node": "^2024.4.0" }, "engines": { @@ -173,6 +173,180 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.59.tgz", + "integrity": "sha512-3vUtQ8DzYcz9xy86UUe8OfDiXNuuLB9zFAUs5N/I2GpkY/MWBJ2M7w5FqH380oC44IzYOWaOMLWCPfNZBsbBww==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.59", + "@napi-rs/canvas-darwin-arm64": "0.1.59", + "@napi-rs/canvas-darwin-x64": "0.1.59", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.59", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.59", + "@napi-rs/canvas-linux-arm64-musl": "0.1.59", + "@napi-rs/canvas-linux-x64-gnu": "0.1.59", + "@napi-rs/canvas-linux-x64-musl": "0.1.59", + "@napi-rs/canvas-win32-x64-msvc": "0.1.59" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.59.tgz", + "integrity": "sha512-p4rRL9KIDz57Z+gKLpemX36DB7fVVHmY4DtesMGrnjx4gSBUM2M7LNzbzf4o3oPZGDiHMY0vnvNHR4dKfszNeg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.59.tgz", + "integrity": "sha512-+8s06WxcM9ilv9PVOl57hvasbwKWMfrrNAYknqMPCn4jpc4XDcLbrM5LTZGhhptlv9jQ9DmHfZ978/xInsMYXw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.59.tgz", + "integrity": "sha512-6kziJHjXdxduYK2L2uuwjEIYoPJednKq+C81MCm3fPobXE4HBKs0JGXwq3GkWNe340U340vmagwXiFi6muEy+g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.59.tgz", + "integrity": "sha512-eCkyS7jojNmaUPaVFdNjAyS0R3isrJtUfRf1vRP6K50GRuHso3vwQRbZBPKM71qHdjPDylfaQc5H6/M7epyD+w==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.59.tgz", + "integrity": "sha512-1u4++lbsolP1MAPViuDoZmgmDLKlV0iJnlHN2dfwgbu3t53P0l3jIT1oCIAiWil0OlrWtDF24JbY7LUUGH5aHg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.59.tgz", + "integrity": "sha512-eqevZ2kWPxeAnvhxl7U5tf6AiMnhlO4w2Hci79WQkfeirqQG6RRM4Jnxbh9iO3jkAnnOXmM4r+S3UrOcfIx1Rg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.59.tgz", + "integrity": "sha512-F+T63RnLt0qYUXhbOpaome3vIWLW4xoQRmhTnkKDzOtBSnKVP7sCM6E5/5tByOFCR3fTj4ksMeeHy8zJScEExA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.59.tgz", + "integrity": "sha512-HhUgpTGQUR2VRslEC5Idf6s0hhamJiVlEh2k3AG9XXOwX6fg0xXkqm84DPiOCLzsO5bqtJEo+rh03BUSDcf53g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.59", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.59.tgz", + "integrity": "sha512-bYMiZJsKPkU7HEoYI5E0alOSV1EkaigY4VEgGHPK9W/qGMmNFsxdbURQqa5h3zbhZTK5QRSdYYqowcTEYVIlug==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -774,21 +948,6 @@ "node": ">=6" } }, - "node_modules/canvas": { - "version": "2.11.2", - "resolved": "https://registry.npmjs.org/canvas/-/canvas-2.11.2.tgz", - "integrity": "sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==", - "dev": true, - "hasInstallScript": true, - "dependencies": { - "@mapbox/node-pre-gyp": "^1.0.0", - "nan": "^2.17.0", - "simple-get": "^3.0.3" - }, - "engines": { - "node": ">=6" - } - }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -924,18 +1083,6 @@ } } }, - "node_modules/decompress-response": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", - "integrity": "sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==", - "dev": true, - "dependencies": { - "mimic-response": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -1745,18 +1892,6 @@ "node": ">= 0.6" } }, - "node_modules/mimic-response": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-2.1.0.tgz", - "integrity": "sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==", - "dev": true, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -1830,12 +1965,6 @@ "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", "dev": true }, - "node_modules/nan": { - "version": "2.20.0", - "resolved": "https://registry.npmjs.org/nan/-/nan-2.20.0.tgz", - "integrity": "sha512-bk3gXBZDGILuuo/6sKtr0DQmSThYHLtNCdSdXk9YkxD/jK6X2vmCyyXBBxyqZ4XcnzTyYEAThfX3DCEnLf6igw==", - "dev": true - }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -1908,12 +2037,6 @@ "wrappy": "1" } }, - "node_modules/opencv-wasm": { - "version": "4.3.0-10", - "resolved": "https://registry.npmjs.org/opencv-wasm/-/opencv-wasm-4.3.0-10.tgz", - "integrity": "sha512-EWmWLUzp2suoc6N44Y4ouWT85QwvShx23Q430R+lp6NyS828bjQn6mCgA3NJ6Z/S59aaTeeu+RhqPQIJIYld1w==", - "dev": true - }, "node_modules/openvino-node": { "version": "2024.4.0", "resolved": "https://registry.npmjs.org/openvino-node/-/openvino-node-2024.4.0.tgz", @@ -2272,37 +2395,6 @@ "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", "dev": true }, - "node_modules/simple-concat": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", - "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/simple-get": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-3.1.1.tgz", - "integrity": "sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==", - "dev": true, - "dependencies": { - "decompress-response": "^4.2.0", - "once": "^1.3.1", - "simple-concat": "^1.0.0" - } - }, "node_modules/sprintf-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", diff --git a/samples/js/node/package.json b/samples/js/node/package.json index 683312fab6742b..d392a72143de03 100644 --- a/samples/js/node/package.json +++ b/samples/js/node/package.json @@ -2,17 +2,18 @@ "name": "openvino-node-demo", "version": "1.0.0", "license": "Apache-2.0", + "type": "commonjs", "devDependencies": { + "@tensorflow/tfjs-node": "^4.19.0", "args": "^5.0.3", - "canvas": "^2.11.2", "eslint": "^8.39.0", "https-proxy-agent": "^7.0.2", - "opencv-wasm": "^4.3.0-10", - "@tensorflow/tfjs-node": "^4.19.0", - "openvino-node": "^2024.4.0" + "openvino-node": "^2024.4.0", + "@napi-rs/canvas": "^0.1.59" }, "scripts": { - "lint": "eslint ." + "lint": "eslint .", + "postinstall": "node ./fetch-samples-assets.js" }, "engines": { "node": ">=21.0.0" diff --git a/samples/js/node/vision_background_removal/README.md b/samples/js/node/vision_background_removal/README.md index cdefc0c4186cbb..a9a3e3270eae45 100644 --- a/samples/js/node/vision_background_removal/README.md +++ b/samples/js/node/vision_background_removal/README.md @@ -1,6 +1,10 @@ # Vision Background Removal Node.js Sample -Run: +Run sample: ```bash -node vision_background_removal.js *path_to_model_file* *path_to_foreground_image* *path_to_background_image* AUTO -``` \ No newline at end of file +node vision_background_removal.js ../../assets/models/unet_ir_model.xml ../../assets/images/coco_hollywood.jpg ../../assets/images/wall.jpg AUTO +``` +Where: +```bash +node vision_background_removal.js *path_to_model_file* *path_to_foreground_image* *path_to_background_image* *device* +``` diff --git a/samples/js/node/vision_background_removal/vision_background_removal.js b/samples/js/node/vision_background_removal/vision_background_removal.js index 2487371c597008..aabaad1a65b80c 100644 --- a/samples/js/node/vision_background_removal/vision_background_removal.js +++ b/samples/js/node/vision_background_removal/vision_background_removal.js @@ -1,12 +1,10 @@ -const { cv } = require('opencv-wasm'); -const fs = require('node:fs').promises; -const path = require('node:path'); const { addon: ov } = require('openvino-node'); -const { createCanvas, ImageData } = require('canvas'); -const { getImageData, transform, setShape } = require('../helpers'); + +const { transform } = require('../helpers'); +const Image = require('../image'); if (require.main === module) { -// Parsing and validation of input arguments + // Parsing and validation of input arguments if (process.argv.length !== 6) throw new Error( `Usage: ${process.argv[1]} ` + @@ -15,21 +13,23 @@ if (require.main === module) { ); const unetModelPath = process.argv[2]; - const foreGroundImage = process.argv[3]; - const backGroundImage = process.argv[4]; + const foregroundImagePath = process.argv[3]; + const backgroundImagePath = process.argv[4]; const deviceName = process.argv[5]; try { - main(unetModelPath, foreGroundImage, backGroundImage, deviceName); + main(unetModelPath, foregroundImagePath, backgroundImagePath, deviceName); } catch(error) { console.error('Error occurred', error); } } +module.exports = main; + async function main( unetModelPath, - foreGroundImage, - backGroundImage, + foregroundImagePath, + backgroundImagePath, deviceName, ) { const core = new ov.Core(); @@ -38,104 +38,81 @@ async function main( const model = await core.readModel(unetModelPath); const compiledModel = await core.compileModel(model, deviceName); - // Get the names of input and output layers. + // Get the names of input and output layers const inputLayer = compiledModel.input(0); const outputLayer = compiledModel.output(0); - // Get Image data from the foreground image - const imageData = await getImageData(foreGroundImage); - const inputImageMat = cv.matFromImageData(imageData); - - // Convert the image shape to a shape and a data type expected by the network - const [, , H, W] = inputLayer.shape; - const resizedImage = new cv.Mat(); - cv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB); - cv.resize(inputImageMat, resizedImage, new cv.Size(W, H)); + // Load foreground image + const originalImg = await Image.load(foregroundImagePath); - const inputImage = transform( - resizedImage.data, - { width: W, height: H }, - [0, 1, 2], - ); - - // Normalize the input image Mat - const normalizedInputImage = normalizeImage(inputImage, W, H); + // Resize image to a shape expected by the network + const [, , modelInputHeight, modelInputWidth] = inputLayer.shape; + const resized = await originalImg.resize(modelInputWidth, modelInputHeight); // Create a tensor from the normalized input image - const tensorData = new Float32Array(normalizedInputImage); - const tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, tensorData); + const transformed = transform( + resized.rgb, + { + width: modelInputWidth, + height: modelInputHeight + }, + [0, 1, 2] + ); + const normalizedInputImage = normalizeImage( + transformed, + modelInputWidth, + modelInputHeight, + ); + const tensor = new ov.Tensor(ov.element.f32, inputLayer.shape, normalizedInputImage); // Do inference const inferRequest = compiledModel.createInferRequest(); const inferResult = await inferRequest.inferAsync([tensor]); + const { data: resultData } = inferResult[outputLayer]; - const { data } = inferResult[outputLayer]; - const reshapedResult = setShape(data, [512, 512]); - - // Create a Mat from the reshaped result - const reshapedMat = cv.matFromArray( - 512, - 512, - cv.CV_32F, - reshapedResult.flat(), - ); - - // Get the height and width of the original image - const height = inputImageMat.rows; - const width = inputImageMat.cols; - - // Resize the inference result to the original image size - const resizedResult = new cv.Mat(); - cv.resize( - reshapedMat, - resizedResult, - new cv.Size(width, height), - 0, - 0, - cv.INTER_LINEAR, - ); - - // Convert the resized result to uint8 - resizedResult.convertTo(resizedResult, cv.CV_8U); - - // Create a Mat to store the background removed result - const bgRemovedResult = inputImageMat.clone(); + // Normalize the result data from grayscale to RGB + const rgbData = []; + for (let i = 0; i < resultData.length; i += 1) { + const value = resultData[i] * 255; - removeBackground(resizedResult, bgRemovedResult); + rgbData.push(value, value, value, 255); + } - // Save the background removed result - await saveImage(bgRemovedResult, './bg_removed_result.jpg'); + // Create image based on result data + const [outputHeight, outputWidth] = outputLayer.shape.slice(2); + const maskImg = await Image.fromArray(rgbData, outputWidth, outputHeight); - // Get the background image data - const bgrImageData = await getImageData(backGroundImage); - const bgrImageMat = cv.matFromImageData(bgrImageData); + // Resize the result mask to the original image size and save it + const { width, height } = originalImg; + const resizedMaskImg = await maskImg.resize(originalImg.width, originalImg.height); + const maskImagePath = './out_mask.jpg'; + await resizedMaskImg.save(maskImagePath); + console.log(`The mask image was saved to '${maskImagePath}'`); - // Resize the background image to the original image size - const resizedBgrImageMat = new cv.Mat(); - cv.cvtColor(bgrImageMat, bgrImageMat, cv.COLOR_BGR2RGB); - cv.resize(bgrImageMat, resizedBgrImageMat, new cv.Size(width, height)); + // Remove the foreground from the original image + const removedBgImg = Image.mask(originalImg, resizedMaskImg); - // Remove the foreground from the background image by - // setting all foreground pixels to white - removeForeground(resizedResult, resizedBgrImageMat); + // Load the background image + const bgrImage = await Image.load(backgroundImagePath); - // Save the foreground removed from the background image - await saveImage(resizedBgrImageMat, './fg_removed_from_background.jpg'); + // Resize the background image to the same size as the original image + const resizedBgrImage = bgrImage.resize(width, height); - // create a new Mat to store the final image - const newImage = new cv.Mat( - resizedBgrImageMat.rows, - resizedBgrImageMat.cols, - cv.CV_8UC3, - ); + // Remove object from the background image + const removedFgImg = Image.mask(resizedBgrImage, resizedMaskImg.invert()); - // combine the foreground and background images to get the final image - combineImages(resizedResult, bgRemovedResult, resizedBgrImageMat, newImage); + // Combine the background and foreground images + const resultImg = Image.merge(removedBgImg, removedFgImg); // Save the final image - await saveImage(newImage, './background_changed_image.jpg'); + const outputImagePath = './out_bgr_changed_image.jpg'; + await resultImg.save(outputImagePath); + console.log(`The result image was saved to '${outputImagePath}'`); + console.log('The background was successfully changed'); } +// Details about this normalization: +// https://docs.openvino.ai/2024/notebooks/vision-background-removal-with-output.html#load-and-pre-process-input-image function normalizeImage(imageData, width, height) { // Mean and scale values const inputMean = [123.675, 116.28, 103.53]; @@ -148,6 +125,7 @@ function normalizeImage(imageData, width, height) { for (let j = 0; j < width; j++) { for (let c = 0; c < channels; c++) { const index = i * width * channels + j * channels + c; + normalizedData[index] = (imageData[index] - inputMean[c]) / inputScale[c]; } @@ -156,93 +134,3 @@ function normalizeImage(imageData, width, height) { return normalizedData; } - -function removeBackground(mask, image) { - // Iterate over the mask and set all background pixels to white - for (let i = 0; i < mask.rows; i++) { - for (let j = 0; j < mask.cols; j++) { - if (mask.ucharPtr(i, j)[0] === 0) { - image.ucharPtr(i, j)[0] = 255; - image.ucharPtr(i, j)[1] = 255; - image.ucharPtr(i, j)[2] = 255; - } - } - } -} - -function removeForeground(mask, image) { - // Iterate over the mask and set all foreground pixels to black - for (let i = 0; i < mask.rows; i++) { - for (let j = 0; j < mask.cols; j++) { - if (mask.ucharPtr(i, j)[0] === 1) { - image.ucharPtr(i, j)[0] = 0; - image.ucharPtr(i, j)[1] = 0; - image.ucharPtr(i, j)[2] = 0; - } else { - image.ucharPtr(i, j)[0] = image.ucharPtr(i, j)[0]; - image.ucharPtr(i, j)[1] = image.ucharPtr(i, j)[1]; - image.ucharPtr(i, j)[2] = image.ucharPtr(i, j)[2]; - } - } - } -} - -function combineImages(mask, fgImage, bgImage, newImage) { - // Iterate over the mask and combine the foreground and background images - for (let i = 0; i < mask.rows; i++) { - for (let j = 0; j < mask.cols; j++) { - if (mask.ucharPtr(i, j)[0] === 1) { - newImage.ucharPtr(i, j)[0] = fgImage.ucharPtr(i, j)[0]; - newImage.ucharPtr(i, j)[1] = fgImage.ucharPtr(i, j)[1]; - newImage.ucharPtr(i, j)[2] = fgImage.ucharPtr(i, j)[2]; - } else { - newImage.ucharPtr(i, j)[0] = bgImage.ucharPtr(i, j)[0]; - newImage.ucharPtr(i, j)[1] = bgImage.ucharPtr(i, j)[1]; - newImage.ucharPtr(i, j)[2] = bgImage.ucharPtr(i, j)[2]; - } - } - } -} - -async function saveImage(rgbImage, savePath) { - const canvas = createCanvas(rgbImage.cols, rgbImage.rows); - const ctx = canvas.getContext('2d'); - const componentsPerPixel = - rgbImage.data.length / (rgbImage.cols * rgbImage.rows); - const imgDataArr = []; - - if (componentsPerPixel === 1) { - for (const val of rgbImage.data) { - imgDataArr.push(val, val, val, 255); - } - } else if (componentsPerPixel === 3) { - for (let i = 0; i < rgbImage.data.length; i += 3) { - imgDataArr.push( - rgbImage.data[i + 2], // Red - rgbImage.data[i + 1], // Green - rgbImage.data[i], // Blue - 255, // Alpha - ); - } - } - - const imageData = new ImageData( - new Uint8ClampedArray(imgDataArr), - rgbImage.cols, - rgbImage.rows, - ); - ctx.putImageData(imageData, 0, 0); - - const dataURL = canvas.toDataURL('image/jpeg'); - const base64Data = dataURL.replace(/^data:image\/jpeg;base64,/, ''); - const imageBuffer = Buffer.from(base64Data, 'base64'); - - const saveDir = path.dirname(savePath); - try { - await fs.mkdir(saveDir, { recursive: true }); - await fs.writeFile(savePath, imageBuffer); - console.log('Image saved successfully!', savePath); - } catch(error) { - console.error('Error saving image:', error); - } -} diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 73cdd57e508bdb..69ad9f460e357a 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -12,6 +12,7 @@ set(shellcheck_skip_list "${OpenVINO_SOURCE_DIR}/thirdparty" "${OpenVINO_SOURCE_DIR}/src/plugins/intel_cpu/thirdparty" "${OpenVINO_SOURCE_DIR}/src/plugins/intel_gpu/thirdparty" + "${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/thirdparty" "${OpenVINO_SOURCE_DIR}/src/bindings/python/thirdparty/pybind11" "${TEMP}") diff --git a/src/bindings/c/include/openvino/c/ov_prepostprocess.h b/src/bindings/c/include/openvino/c/ov_prepostprocess.h index ea2a82943dd683..d7cbbdc26eb5cb 100644 --- a/src/bindings/c/include/openvino/c/ov_prepostprocess.h +++ b/src/bindings/c/include/openvino/c/ov_prepostprocess.h @@ -93,6 +93,18 @@ typedef enum { RESIZE_NEAREST //!< nearest algorithm } ov_preprocess_resize_algorithm_e; +/** + * @enum ov_padding_mode_e + * @ingroup ov_prepostprocess_c_api + * @brief This enum contains enumeration for padding mode. + */ +typedef enum { + CONSTANT = 0, //!< Pads with given constant value. + EDGE, //!< Pads with tensor edge values. + REFLECT, //!< Pads with reflection of tensor data along axis. Values on the edges are not duplicated. + SYMMETRIC //!< Pads similar like `REFLECT` but values on the edges are duplicated. +} ov_padding_mode_e; + /** * @brief Create a ov_preprocess_prepostprocessor_t instance. * @ingroup ov_prepostprocess_c_api @@ -512,3 +524,23 @@ ov_preprocess_input_model_info_set_layout(ov_preprocess_input_model_info_t* prep */ OPENVINO_C_API(ov_status_e) ov_preprocess_prepostprocessor_build(const ov_preprocess_prepostprocessor_t* preprocess, ov_model_t** model); + +/** + * @brief Add pad preprocess operation. Extends an input tensor on edges with constants. + * + * @param preprocess_input_process_steps A pointer to the ov_preprocess_preprocess_steps_t. + * @param pads_begin Number of padding elements to add at the beginning of each axis. + * @param pads_begin_size Pads begin size (number of axes). + * @param pads_end Number of padding elements to add at the end of each axis. + * @param pads_end_size Pads end size (number of axes). + * @param value Value to be populated in the padded area (mode=CONSTANT) + * @param mode Padding mode. + */ +OPENVINO_C_API(ov_status_e) +ov_preprocess_preprocess_steps_pad(const ov_preprocess_preprocess_steps_t* preprocess_input_process_steps, + const int* const pads_begin, + size_t pads_begin_size, + const int* const pads_end, + size_t pads_end_size, + float value, + ov_padding_mode_e mode); diff --git a/src/bindings/c/src/ov_prepostprocess.cpp b/src/bindings/c/src/ov_prepostprocess.cpp index 616883dd54e74c..8448e9daeb10b1 100644 --- a/src/bindings/c/src/ov_prepostprocess.cpp +++ b/src/bindings/c/src/ov_prepostprocess.cpp @@ -24,6 +24,12 @@ const std::map color_format_map {ov_color_format_e::RGBX, ov::preprocess::ColorFormat::RGBX}, {ov_color_format_e::BGRX, ov::preprocess::ColorFormat::BGRX}}; +const std::map padding_mode_map = { + {ov_padding_mode_e::CONSTANT, ov::preprocess::PaddingMode::CONSTANT}, + {ov_padding_mode_e::EDGE, ov::preprocess::PaddingMode::EDGE}, + {ov_padding_mode_e::REFLECT, ov::preprocess::PaddingMode::REFLECT}, + {ov_padding_mode_e::SYMMETRIC, ov::preprocess::PaddingMode::SYMMETRIC}}; + #define GET_OV_COLOR_FARMAT(a) \ (color_format_map.find(a) == color_format_map.end() ? ov::preprocess::ColorFormat::UNDEFINED \ : color_format_map.at(a)) @@ -524,3 +530,23 @@ ov_status_e ov_preprocess_prepostprocessor_build(const ov_preprocess_prepostproc return ov_status_e::OK; } + +ov_status_e ov_preprocess_preprocess_steps_pad(const ov_preprocess_preprocess_steps_t* preprocess_input_process_steps, + const int* const pads_begin, + size_t pads_begin_size, + const int* const pads_end, + size_t pads_end_size, + float value, + ov_padding_mode_e mode) { + if (!preprocess_input_process_steps) { + return ov_status_e::INVALID_C_PARAM; + } + try { + std::vector vec_begin(pads_begin, pads_begin + pads_begin_size); + std::vector vec_end(pads_end, pads_end + pads_end_size); + preprocess_input_process_steps->object->pad(vec_begin, vec_end, value, padding_mode_map.at(mode)); + } + CATCH_OV_EXCEPTIONS + + return ov_status_e::OK; +} diff --git a/src/bindings/c/tests/ov_preprocess_test.cpp b/src/bindings/c/tests/ov_preprocess_test.cpp index 94252bd3a3cbb9..aeb7a097af1352 100644 --- a/src/bindings/c/tests/ov_preprocess_test.cpp +++ b/src/bindings/c/tests/ov_preprocess_test.cpp @@ -1,6 +1,8 @@ // Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include + #include "ov_test.hpp" class ov_preprocess_test : public ::testing::Test { @@ -596,3 +598,71 @@ TEST_F(ov_preprocess_test, ov_preprocess_prepostprocessor_for_nv12_input) { ov_layout_free(layout); } + +TEST_F(ov_preprocess_test, ov_preprocess_preprocess_steps_pad_constant) { + OV_EXPECT_OK(ov_preprocess_prepostprocessor_create(model, &preprocess)); + EXPECT_NE(nullptr, preprocess); + + OV_EXPECT_OK(ov_preprocess_prepostprocessor_get_input_info(preprocess, &input_info)); + EXPECT_NE(nullptr, input_info); + + OV_EXPECT_OK(ov_preprocess_input_info_get_tensor_info(input_info, &input_tensor_info)); + EXPECT_NE(nullptr, input_tensor_info); + + ov_shape_t shape; + int64_t dims[4] = {1, 1, 225, 230}; + OV_EXPECT_OK(ov_shape_create(4, dims, &shape)); + + OV_EXPECT_OK(ov_tensor_create(ov_element_type_e::F32, shape, &tensor)); + OV_EXPECT_OK(ov_preprocess_input_tensor_info_set_from(input_tensor_info, tensor)); + + OV_EXPECT_OK(ov_preprocess_input_info_get_preprocess_steps(input_info, &input_process)); + EXPECT_NE(nullptr, input_process); + + constexpr auto pads_begin = std::array{0, 1, 2, 0}; + constexpr auto pads_end = std::array{0, 1, 0, -3}; + + OV_EXPECT_OK(ov_preprocess_preprocess_steps_pad(input_process, + pads_begin.data(), + pads_begin.size(), + pads_end.data(), + pads_end.size(), + 1.0f, + ov_padding_mode_e::CONSTANT)); + OV_EXPECT_OK(ov_preprocess_prepostprocessor_build(preprocess, &ppp_model)); + EXPECT_NE(nullptr, ppp_model); +} + +TEST_F(ov_preprocess_test, ov_preprocess_preprocess_steps_pad_edge) { + OV_EXPECT_OK(ov_preprocess_prepostprocessor_create(model, &preprocess)); + EXPECT_NE(nullptr, preprocess); + + OV_EXPECT_OK(ov_preprocess_prepostprocessor_get_input_info(preprocess, &input_info)); + EXPECT_NE(nullptr, input_info); + + OV_EXPECT_OK(ov_preprocess_input_info_get_tensor_info(input_info, &input_tensor_info)); + EXPECT_NE(nullptr, input_tensor_info); + + ov_shape_t shape; + int64_t dims[4] = {1, 2, 225, 230}; + OV_EXPECT_OK(ov_shape_create(4, dims, &shape)); + + OV_EXPECT_OK(ov_tensor_create(ov_element_type_e::F32, shape, &tensor)); + OV_EXPECT_OK(ov_preprocess_input_tensor_info_set_from(input_tensor_info, tensor)); + + OV_EXPECT_OK(ov_preprocess_input_info_get_preprocess_steps(input_info, &input_process)); + EXPECT_NE(nullptr, input_process); + + constexpr auto pads_begin = std::array{0, 0, 2, 0}; + constexpr auto pads_end = std::array{0, 1, 0, -3}; + + OV_EXPECT_OK(ov_preprocess_preprocess_steps_pad(input_process, + pads_begin.data(), + pads_begin.size(), + pads_end.data(), + pads_end.size(), + 1.0f, + ov_padding_mode_e::EDGE)); + OV_EXPECT_OK(ov_preprocess_prepostprocessor_build(preprocess, &ppp_model)); + EXPECT_NE(nullptr, ppp_model); +} diff --git a/src/bindings/js/node/package.json b/src/bindings/js/node/package.json index 8bc6bbd4bb1d46..1ca1f10cdf57c2 100644 --- a/src/bindings/js/node/package.json +++ b/src/bindings/js/node/package.json @@ -3,7 +3,7 @@ "version": "2024.4.0", "description": "OpenVINO™ utils for using from Node.js environment", "repository": { - "url": "https://github.com/openvinotoolkit/openvino.git", + "url": "git+https://github.com/openvinotoolkit/openvino.git", "type": "git" }, "license": "Apache-2.0", @@ -23,7 +23,7 @@ "test:e2e": "mocha ./tests/e2e/electron-app.test.js", "tsc": "tsc", "postinstall": "npm run install_runtime", - "download_runtime": "node ./scripts/download_runtime.js", + "download_runtime": "node ./scripts/download-runtime.js", "install_runtime": "npm run download_runtime -- --ignore-if-exists" }, "devDependencies": { diff --git a/src/bindings/js/node/scripts/download-runtime.js b/src/bindings/js/node/scripts/download-runtime.js new file mode 100644 index 00000000000000..90bece67161a6a --- /dev/null +++ b/src/bindings/js/node/scripts/download-runtime.js @@ -0,0 +1,24 @@ +const { join } = require('node:path'); + +const BinaryManager = require('./lib/binary-manager'); +const packageJson = require('../package.json'); + +if (require.main === module) main(); + +async function main() { + if (!BinaryManager.isCompatible()) process.exit(1); + + const force = process.argv.includes('-f') || process.argv.includes('--force'); + const ignoreIfExists = process.argv.includes('-i') + || process.argv.includes('--ignore-if-exists'); + + const { env } = process; + const proxy = env.http_proxy || env.HTTP_PROXY || env.npm_config_proxy; + + await BinaryManager.prepareBinary( + join(__dirname, '..'), + packageJson.version, + packageJson.binary, + { force, ignoreIfExists, proxy }, + ); +} diff --git a/src/bindings/js/node/scripts/download_runtime.js b/src/bindings/js/node/scripts/download_runtime.js deleted file mode 100644 index 321eb4b125bc6c..00000000000000 --- a/src/bindings/js/node/scripts/download_runtime.js +++ /dev/null @@ -1,302 +0,0 @@ -const os = require('os'); -const path = require('path'); -const tar = require('tar-fs'); -const https = require('node:https'); -const gunzip = require('gunzip-maybe'); -const fs = require('node:fs/promises'); -const { createReadStream, createWriteStream } = require('node:fs'); -const { HttpsProxyAgent } = require('https-proxy-agent'); - -const packageJson = require('../package.json'); - -const codeENOENT = 'ENOENT'; - -if (require.main === module) { - main(); -} - -async function main() { - const modulePath = packageJson.binary['module_path']; - const destinationPath = path.resolve(__dirname, '..', modulePath); - const force = process.argv.includes('-f'); - const ignoreIfExists = process.argv.includes('--ignore-if-exists'); - const { env } = process; - const proxy = env.http_proxy || env.HTTP_PROXY || env.npm_config_proxy; - - try { - await downloadRuntime(destinationPath, { force, ignoreIfExists, proxy }); - } catch(error) { - if (error instanceof RuntimeExistsError) { - console.error( - `Directory '${destinationPath}' already exists. ` + - 'To force runtime downloading run \'npm run download_runtime -- -f\'', - ); - } else { - throw error; - } - process.exit(1); - } -} - -class RuntimeExistsError extends Error { - constructor(message) { - super(message); - this.name = 'RuntimeExistsError'; - Error.captureStackTrace(this, RuntimeExistsError); - } -} - -/** - * Download OpenVINO Runtime archive and extract it to destination directory. - * - * @async - * @function downloadRuntime - * @param {string} destinationPath - The destination directory path. - * @param {Object} [config] - The configuration object. - * @param {boolean} [config.force=false] - The flag - * to force install and replace runtime if it exists. Default is `false`. - * @param {boolean} [config.ignoreIfExists=true] - The flag - * to skip installation if it exists Default is `true`. - * @param {string|null} [config.proxy=null] - The proxy URL. Default is `null`. - * @returns {Promise} - * @throws {RuntimeExistsError} - */ -async function downloadRuntime( - destinationPath, - config = { force: false, ignoreIfExists: true, proxy: null }, -) { - const { version } = packageJson; - const osInfo = await getOsInfo(); - const isRuntimeDirectoryExists = await checkIfPathExists(destinationPath); - - if (isRuntimeDirectoryExists && !config.force) { - if (config.ignoreIfExists) { - console.warn( - `Directory '${destinationPath}' already exists. Skipping ` + - 'runtime downloading because \'ignoreIfExists\' flag is passed.', - ); - - return; - } - - throw new RuntimeExistsError( - `Directory '${destinationPath}' already exists. ` + - 'To force runtime downloading use \'force\' flag.', - ); - } - - const runtimeArchiveUrl = getRuntimeArchiveUrl(version, osInfo); - const tmpDir = `temp-ov-runtime-archive-${new Date().getTime()}`; - const tempDirectoryPath = path.join(os.tmpdir(), tmpDir); - - try { - const filename = path.basename(runtimeArchiveUrl); - const archiveFilePath = path.resolve(tempDirectoryPath, filename); - - await fs.mkdir(tempDirectoryPath); - - console.log('Downloading OpenVINO runtime archive...'); - await downloadFile( - runtimeArchiveUrl, - tempDirectoryPath, - filename, - config.proxy, - ); - console.log('OpenVINO runtime archive downloaded.'); - - await removeDirectory(destinationPath); - - console.log('Extracting archive...'); - await unarchive(archiveFilePath, destinationPath); - - console.log('The archive was successfully extracted.'); - } catch(error) { - console.error(`Failed to download OpenVINO runtime: ${error}.`); - throw error; - } finally { - await removeDirectory(tempDirectoryPath); - } -} - -/** - * The OS information object. - * @typedef {Object} OsInfo - * @property {NodeJS.Platform} platform - * @property {string} arch - */ - -/** - * Get information about OS. - * - * @async - * @function getOsInfo - * @returns {Promise} - */ -async function getOsInfo() { - const platform = os.platform(); - - if (!['win32', 'linux', 'darwin'].includes(platform)) { - throw new Error(`Platform '${platform}' is not supported.`); - } - - const arch = os.arch(); - - if (!['arm64', 'armhf', 'x64'].includes(arch)) { - throw new Error(`Architecture '${arch}' is not supported.`); - } - - if (platform === 'win32' && arch !== 'x64') { - throw new Error(`Version for windows and '${arch}' is not supported.`); - } - - return { platform, arch }; -} - -/** - * Check if path exists. - * - * @async - * @function checkIfPathExists - * @param {string} path - The path to directory or file. - * @returns {Promise} - */ -async function checkIfPathExists(path) { - try { - await fs.access(path); - - return true; - } catch(error) { - if (error.code === codeENOENT) { - return false; - } - throw error; - } -} - -/** - * Get OpenVINO runtime archive URL. - * - * @function getRuntimeArchiveUrl - * @param {string} version - Package version. - * @param {OsInfo} osInfo - The OS related data. - * @returns {string} - */ -function getRuntimeArchiveUrl(version, osInfo) { - const { - host, - package_name: packageNameTemplate, - remote_path: remotePathTemplate, - } = packageJson.binary; - const fullPathTemplate = `${remotePathTemplate}${packageNameTemplate}`; - const fullPath = fullPathTemplate - .replace(new RegExp('{version}', 'g'), version) - .replace(new RegExp('{platform}', 'g'), osInfo.platform) - .replace(new RegExp('{arch}', 'g'), osInfo.arch); - - return new URL(fullPath, host).toString(); -} - -/** - * Remove directory and its content. - * - * @async - * @function removeDirectory - * @param {string} path - The directory path. - * @returns {Promise} - */ -async function removeDirectory(path) { - try { - console.log(`Removing ${path}`); - await fs.rm(path, { recursive: true, force: true }); - } catch(error) { - if (error.code === codeENOENT) console.log(`Path: ${path} doesn't exist`); - - throw error; - } -} - -/** - * Download file by URL and save it to the destination path. - * - * @function downloadFile - * @param {string} url - The file URL. - * @param {string} filename - The filename of result file. - * @param {string} destination - The destination path of result file. - * @param {string} [proxy=null] - (Optional) The proxy URL. - * @returns {Promise} - */ -function downloadFile(url, destination, filename, proxy = null) { - const timeout = 5000; - const fullPath = path.resolve(destination, filename); - const file = createWriteStream(fullPath); - - if (new URL(url).protocol === 'http') - throw new Error('Http link doesn\'t support'); - - let agent; - - if (proxy) { - agent = new HttpsProxyAgent(proxy); - console.log(`Proxy agent is configured with '${proxy}'.`); - } - - return new Promise((resolve, reject) => { - file.on('error', (error) => { - reject(`Failed to open file stream: ${error}.`); - }); - - console.log(`Download file by link: ${url}`); - - const request = https.get(url, { agent }, (res) => { - const { statusCode } = res; - - if (statusCode !== 200) { - return reject(`Server returned status code ${statusCode}.`); - } - - res.pipe(file); - - file.on('finish', () => { - file.close(); - console.log(`File was successfully downloaded to '${fullPath}'.`); - resolve(); - }); - }); - - request.on('error', (error) => { - reject(`Failed to send request: ${error}.`); - }); - - request.setTimeout(timeout, () => { - request.destroy(); - reject(`Request was timed out after ${timeout} ms.`); - }); - }); -} - -/** - * Unarchive tar and tar.gz archives. - * - * @function unarchive - * @param {tarFilePath} tarFilePath - Path to archive. - * @param {dest} tarFilePath - Path where to unpack. - * @returns {Promise} - */ -function unarchive(tarFilePath, dest) { - return new Promise((resolve, reject) => { - createReadStream(tarFilePath) - .pipe(gunzip()) - .pipe( - tar - .extract(dest) - .on('finish', () => { - resolve(); - }) - .on('error', (err) => { - reject(err); - }), - ); - }); -} - -module.exports = { downloadRuntime, downloadFile, checkIfPathExists }; diff --git a/src/bindings/js/node/scripts/lib/binary-manager.js b/src/bindings/js/node/scripts/lib/binary-manager.js new file mode 100644 index 00000000000000..f0af78b49093ec --- /dev/null +++ b/src/bindings/js/node/scripts/lib/binary-manager.js @@ -0,0 +1,172 @@ +const os = require('node:os'); +const tar = require('tar-fs'); +const path = require('node:path'); +const gunzip = require('gunzip-maybe'); +const fs = require('node:fs/promises'); +const { createReadStream } = require('node:fs'); + +const { downloadFile, checkIfPathExists, removeDirectory } = require('./utils'); + +class BinaryManager { + constructor(packageRoot, version, binaryConfig) { + this.packageRoot = packageRoot; + this.version = version; + this.binaryConfig = binaryConfig; + } + + getPlatformLabel() { + return os.platform(); + } + + getArchLabel() { + return os.arch(); + } + + getExtension() { + return 'tar.gz'; + } + + getArchiveUrl() { + const { + host, + package_name: packageNameTemplate, + remote_path: remotePathTemplate, + } = this.binaryConfig; + const fullPathTemplate = `${remotePathTemplate}${packageNameTemplate}` + const fullPath = fullPathTemplate + .replace(new RegExp('{version}', 'g'), this.version) + .replace(new RegExp('{platform}', 'g'), this.getPlatformLabel()) + .replace(new RegExp('{arch}', 'g'), this.getArchLabel()) + .replace(new RegExp('{extension}', 'g'), this.getExtension()); + + return new URL(fullPath, host).toString(); + } + + getDestinationPath() { + const modulePath = this.binaryConfig['module_path']; + + return path.resolve(this.packageRoot, modulePath); + } + + /** + * Prepares the binary by downloading and extracting the OpenVINO runtime archive. + * + * @param {string} packageRoot - The root directory of the package. + * @param {string} version - The version of the binary. + * @param {Object} binaryConfig - The configuration object for the binary. + * @param {Object} options - The options for preparing the binary. + * @param {boolean} options.force - Whether to force the download if the directory already exists. + * @param {boolean} options.ignoreIfExists - Whether to ignore the download if the directory already exists. + * @param {string} [options.proxy] - The proxy to use for downloading the file. + * @throws {Error} If the directory already exists and the force option is not set. + * @throws {Error} If the download or extraction fails. + * @returns {Promise} A promise that resolves when the binary is prepared. + */ + static async prepareBinary(packageRoot, version, binaryConfig, options) { + const binaryManager = new this(packageRoot, version, binaryConfig); + const destinationPath = binaryManager.getDestinationPath(); + const isRuntimeDirectoryExists = await checkIfPathExists(destinationPath); + + if (isRuntimeDirectoryExists && !options.force) { + if (options.ignoreIfExists) { + console.warn( + `Directory '${destinationPath}' already exists. Skipping ` + + 'runtime downloading because "ignoreIfExists" flag is passed.' + ); + + return; + } + + throw new Error( + `Directory '${destinationPath}' already exists. ` + + 'To force runtime downloading use "force" flag.', + ); + } + + const archiveUrl = binaryManager.getArchiveUrl(); + let tempDirectoryPath = null; + + try { + tempDirectoryPath = await fs.mkdtemp( + path.join(os.tmpdir(), 'temp-ov-runtime-archive-') + ); + + const filename = path.basename(archiveUrl); + + console.log('Downloading OpenVINO runtime archive...'); + const archiveFilePath = await downloadFile( + archiveUrl, + tempDirectoryPath, + filename, + options.proxy, + ) + console.log('OpenVINO runtime archive downloaded.'); + + await removeDirectory(destinationPath); + await this.unarchive(archiveFilePath, destinationPath); + console.log('The archive was successfully extracted.'); + } catch(error) { + console.error(`Failed to download OpenVINO runtime: ${error}.`); + throw error; + } finally { + if (tempDirectoryPath) await removeDirectory(tempDirectoryPath); + } + } + + /** + * Checks if the current platform and architecture are compatible. + * + * Supported platforms: 'win32', 'linux', 'darwin'. + * Supported architectures: 'arm64', 'armhf', 'x64'. + * + * If the platform or architecture is not supported, an error message is logged to the console. + * + * @returns {boolean} Returns true if the platform and architecture are compatible, otherwise false. + */ + static isCompatible() { + const missleadings = []; + const platform = os.platform(); + + if (!['win32', 'linux', 'darwin'].includes(platform)) + missleadings.push(`Platform '${platform}' is not supported.`); + + const arch = os.arch(); + + if (!['arm64', 'armhf', 'x64'].includes(arch)) + missleadings.push(`Architecture '${arch}' is not supported.`); + + if (platform === 'win32' && arch !== 'x64') + missleadings.push(`Version for windows and '${arch}' is not supported.`); + + if (missleadings.length) { + console.error(missleadings.join(' ')); + return false; + } + + return true; + } + + /** + * Unarchive tar and tar.gz archives. + * + * @function unarchive + * @param {string} archivePath - Path to archive. + * @param {string} dest - Path where to unpack. + * @returns {Promise} + */ + static unarchive(archivePath, dest) { + return new Promise((resolve, reject) => { + createReadStream(archivePath) + .pipe(gunzip()) + .pipe(tar.extract(dest) + .on('finish', () => { + resolve(); + }).on('error', (err) => { + reject(err); + }), + ); + }); + } +} + +module.exports = BinaryManager; diff --git a/src/bindings/js/node/scripts/lib/utils.js b/src/bindings/js/node/scripts/lib/utils.js new file mode 100644 index 00000000000000..9658ec504fa0d9 --- /dev/null +++ b/src/bindings/js/node/scripts/lib/utils.js @@ -0,0 +1,115 @@ +const path = require('node:path'); +const https = require('node:https'); +const fs = require('node:fs/promises'); +const { createWriteStream } = require('node:fs'); + +const { HttpsProxyAgent } = require('https-proxy-agent'); + +const codeENOENT = 'ENOENT'; + +module.exports = { + removeDirectory, + checkIfPathExists, + downloadFile, +}; + +/** + * Remove directory and its content. + * + * @async + * @function removeDirectory + * @param {string} path - The directory path. + * @returns {Promise} + */ +async function removeDirectory(path) { + try { + console.log(`Removing ${path}`); + await fs.rm(path, { recursive: true }); + } catch (error) { + if (error.code !== codeENOENT) throw error; + + console.warn(`Path: ${path} doesn't exist`); + } +} + +/** + * Check if path exists. + * + * @async + * @function checkIfPathExists + * @param {string} path - The path to directory or file. + * @returns {Promise} + */ +async function checkIfPathExists(path) { + try { + await fs.access(path); + return true; + } catch (error) { + if (error.code === codeENOENT) { + return false; + } + throw error; + } +} + +/** + * Download file by URL and save it to the destination path. + * + * @function downloadFile + * @param {string} url - The file URL. + * @param {string} filename - The filename of result file. + * @param {string} destination - The destination path of result file. + * @param {string} [proxy=null] - (Optional) The proxy URL. + * @returns {Promise} - Path to downloaded file. + */ +function downloadFile(url, destination, filename, proxy = null) { + console.log(`Downloading file by link: ${url} to ${destination}` + + `with filename: ${filename}`); + + const timeout = 5000; + const fullPath = path.resolve(destination, filename); + const file = createWriteStream(fullPath); + + if (new URL(url).protocol === 'http:') + throw new Error('Http link doesn\'t support'); + + let agent; + + if (proxy) { + agent = new HttpsProxyAgent(proxy); + console.log(`Proxy agent is configured with '${proxy}'.`); + } + + return new Promise((resolve, reject) => { + file.on('error', (error) => { + reject(`Failed to open file stream: ${error}.`); + }); + + console.log(`Download file by link: ${url}`); + + const request = https.get(url, { agent }, (res) => { + const { statusCode } = res; + + if (statusCode !== 200) { + return reject(`Server returned status code ${statusCode}.`); + } + + res.pipe(file); + + file.on('finish', () => { + file.close(); + console.log(`File was successfully downloaded to '${fullPath}'.`); + resolve(fullPath); + }); + }); + + request.on('error', (error) => { + reject(`Failed to send request: ${error}.`); + }); + + request.setTimeout(timeout, () => { + request.destroy(); + reject(`Request was timed out after ${timeout} ms.`); + }); + }); +} diff --git a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js index cfa5fd27b0fa4e..58cc6b3b3cf450 100644 --- a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js +++ b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js @@ -1,11 +1,39 @@ const { app } = require('electron'); const { addon: ov } = require('openvino-node'); -app.whenReady().then(() => { - console.log('Creating OpenVINO Runtime Core'); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const core = new ov.Core(); - console.log('Created OpenVINO Runtime Core'); +const epsilon = 0.5; // To avoid very small numbers +const pathToModel = '../tests/unit/test_models/test_model_fp32.xml'; + +main(); + +async function main() { + await app.whenReady(); + + try { + console.log('Creating OpenVINO Runtime Core'); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const core = new ov.Core(); + console.log('Created OpenVINO Runtime Core'); + + const model = await core.readModel(pathToModel); + console.log('Model read successfully:', model); + const compiledModel = await core.compileModel(model, 'CPU'); + const inferRequest = compiledModel.createInferRequest(); + console.log('Infer request created:', inferRequest); + + const tensorData = Float32Array.from( + { length: 3072 }, + () => Math.random() + epsilon, + ); + const tensor = new ov.Tensor(ov.element.f32, [1, 3, 32, 32], tensorData); + console.log('Tensor created:', tensor); + + const result = await inferRequest.inferAsync([tensor]); + console.log('Infer request result:', result); + } catch (error) { + console.error('Error:', error); + app.exit(1); + } app.exit(0); -}); +} diff --git a/src/bindings/js/node/tests/e2e/electron-app.test.js b/src/bindings/js/node/tests/e2e/electron-app.test.js index 01e84dea884502..98982a5f941263 100644 --- a/src/bindings/js/node/tests/e2e/electron-app.test.js +++ b/src/bindings/js/node/tests/e2e/electron-app.test.js @@ -1,24 +1,17 @@ /* global describe, it, before, after */ const fs = require('node:fs'); +const util = require('node:util'); const assert = require('node:assert'); const { exec } = require('child_process'); +const execPromise = util.promisify(exec); +const { testModels, downloadTestModel } = require('../unit/utils.js'); describe('E2E testing for OpenVINO as an Electron dependency.', function() { this.timeout(50000); - before((done) => { - exec( - 'cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project', - (error) => { - if (error) { - console.error(`exec error: ${error}`); - - return done(error); - } - - done(); - }, - ); + before(async () => { + await downloadTestModel(testModels.testModelFP32); + await execPromise('cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project'); }); it('should install dependencies', (done) => { @@ -37,7 +30,7 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { }); it('should run electron package and verify output', (done) => { - exec('cd demo-electron-app-project && npm start', (error, stdout) => { + exec(`cd demo-electron-app-project && npm start`, (error, stdout) => { if (error) { console.error(`exec error: ${error}`); @@ -48,6 +41,14 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { stdout.includes('Created OpenVINO Runtime Core'), 'Check that openvino-node operates fine', ); + assert( + stdout.includes('Model read successfully: ModelWrap {}'), + 'Check that model is read successfully', + ); + assert( + stdout.includes('Infer request result: { fc_out: TensorWrap {} }'), + 'Check that infer request result is successful', + ); done(); }); }); diff --git a/src/bindings/js/node/tests/unit/utils.js b/src/bindings/js/node/tests/unit/utils.js index c2089e30b4cdc8..456f87983dba20 100644 --- a/src/bindings/js/node/tests/unit/utils.js +++ b/src/bindings/js/node/tests/unit/utils.js @@ -7,7 +7,7 @@ const fs = require('node:fs/promises'); const { downloadFile, checkIfPathExists, -} = require('../../scripts/download_runtime'); +} = require('../../scripts/lib/utils'); const modelDir = 'tests/unit/test_models/'; const testModels = { diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index bb3d708a0ca23d..a0fbf982105ad6 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -1,5 +1,5 @@ # used in multiple components -numpy>=1.16.6,<2.1.0 # Python bindings, frontends +numpy>=1.16.6,<2.2.0 # Python bindings, frontends # pytest pytest>=5.0,<8.4 @@ -8,9 +8,9 @@ pytest-html==4.1.1 pytest-timeout==2.3.1 # Python bindings -py>=1.9.0 +build<1.3 pygments>=2.8.1 -setuptools>=65.6.1,<74.1.0 +setuptools>=65.6.1,<75.3.0 sympy>=1.10 wheel>=0.38.1 patchelf<=0.17.2.1 @@ -18,8 +18,8 @@ patchelf<=0.17.2.1 # Frontends h5py>=3.1.0,<3.13.0 docopt~=0.6.2 -paddlepaddle==2.6.0 +paddlepaddle==2.6.2 tensorflow>=1.15.5,<2.18.0 six~=1.16.0 protobuf>=3.18.1,<4.0.0 -onnx==1.16.0 +onnx==1.17.0 diff --git a/src/bindings/python/docs/build.md b/src/bindings/python/docs/build.md index d0ab2e5f5f4e57..f824d9ccb8d82a 100644 --- a/src/bindings/python/docs/build.md +++ b/src/bindings/python/docs/build.md @@ -1,9 +1,18 @@ # Building the OpenVINO™ Python API **Refer to ["How to build OpenVINO" in OpenVINO™ developer documentation](../../../../docs/dev/build.md) for general building instructions.** - For each platform, you can build and install the API as a part of OpenVINO™ Toolkit or as a Python wheel. -A Python wheel is a portable package that allows you to install OpenVINO™ in either your Python distribution or a dedicated virtual environment. + +## Using Python Wheels for OpenVINO™ +Wheels are portable Python packages that are ready to install upon download. They are the commonly used binary distributions in Python as they avoid the compiling of extension modules and associated dependency issues on the user end. As a result, wheels installation is faster and smoother, simplifying the process for both developers and users. The ```.whl``` format is the default preference for ```pip``` when you run a ```pip install``` command. + +OpenVINO wheels for various platforms are available on [PyPI](https://pypi.org/project/openvino/#files), enabling you to install OpenVINO™ in your Python distribution or a dedicated virtual environment. + +OpenVINO builds two different wheels with separate ```setup.py``` files for [```openvino```](../wheel/setup.py) and [```openvino-dev```](../../../../tools/openvino_dev/setup.py). To build the wheels while building the project from source, your ```cmake``` command should include ```-DENABLE_PYTHON=ON``` and ```-DENABLE_WHEEL=ON```. Once built, the wheels can be found under ```openvino_install_dir/tools```. + +While wheels make installation easier, using wheels for development offers less customization than building from source and exporting ```PYTHONPATH``` and other environment variables to OpenVINO directories. Wheels are usually provided for specific packaged versions and might not contain the most recent changes that are available if you choose to clone the repository and build it yourself. + +To learn more about wheels and their use cases, check out the article [What Are Python Wheels and Why Should You Care?](https://realpython.com/python-wheels/). ## Virtual environments @@ -38,7 +47,9 @@ OpenVINO can be built based on specific virtual environments such as [venv](http 5. Install developer requirements for OpenVINO™ Python API while inside virtual environment: ```shell - cd + git clone https://github.com/openvinotoolkit/openvino.git + cd openvino + git submodule update --init --recursive pip install -r src/bindings/python/requirements.txt pip install -r src/bindings/python/requirements_test.txt ``` diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index e311c6ed6438db..a2d63161fe764c 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.16.6,<2.1.0 +numpy>=1.16.6,<2.2.0 openvino-telemetry>=2023.2.1 packaging diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt index 9d16cbe5c9b21a..1aa2ff24b1b948 100644 --- a/src/bindings/python/requirements_test.txt +++ b/src/bindings/python/requirements_test.txt @@ -7,7 +7,7 @@ flake8-annotations-complexity<=0.0.8 flake8-broken-line<=1.0.0 flake8-bugbear<=24.8.19 flake8-class-attributes-order<=0.1.3 -flake8-comprehensions<=3.15.0 +flake8-comprehensions<=3.16.0 flake8-debugger<=4.1.2 flake8-docstrings<=1.7.0 flake8-eradicate<=1.5.0 @@ -30,7 +30,6 @@ pytest-forked; sys_platform != 'win32' pytest-xdist pytest-html pytest -py radon retrying tox diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py index 3fe1ba465dfd1f..a1c6aecc45d421 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py @@ -77,7 +77,8 @@ def patched_forward_sym(self, *args, **kwargs): unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width) # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation - unpacked_weights = unpacked_weights.to(dtype) * self.scales + unpacked_weights = (unpacked_weights.to(torch.int8) - torch.tensor(8, dtype=torch.int8)) + unpacked_weights = unpacked_weights.to(dtype) * self.scales unpacked_weights = unpacked_weights.view(-1, self.width) out = x @ unpacked_weights diff --git a/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py index 908a7f8660a94c..55001180cba3fb 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py @@ -4,6 +4,7 @@ # flake8: noqa # mypy: ignore-errors +import functools import logging import torch from openvino.frontend.pytorch import ModuleExtension @@ -11,16 +12,6 @@ log = logging.getLogger(__name__) -class no_jit_trace: - def __enter__(self): - self.state = torch._C._get_tracing_state() - torch._C._set_tracing_state(None) - - def __exit__(self, *args): - torch._C._set_tracing_state(self.state) - self.state = None - - def patch_model(model, module_extensions, orig_forward_name): def module_patcher(m, name): extension = None @@ -32,47 +23,44 @@ def module_patcher(m, name): extension = module_extensions[name] if extension: - # The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module. + log.debug("Patching module %s", m) + # The Trampoline class is instantiated for every module replacement, so we can use + # class members individually for each module. class Trampoline(torch.autograd.Function): + # required to be saved in class target_extension = extension - original_module = m - stashed_args = None - stashed_kwargs = None @staticmethod @torch.jit.ignore - def forward(*args, **kwargs): - with no_jit_trace(): - # `module` is going to be passed to a user-defined function `evaluate` - # `module` is patched: forward function was replaced, and we are actually in this patched function right in this code - # if we pass `module` as-is to the user code below, and it happens to call forward it will lead to infinite recursion or fail - # so we need to temporary patch the module back to the original forward and then return it back again - # stash the current forward to be able to return it back - patched_forward = m.forward - # set original forward for the module - m.forward = getattr(m, orig_forward_name) - # call user code - results = extension.evaluate( - m, *Trampoline.stashed_args, **Trampoline.stashed_kwargs) # call user code - m.forward = patched_forward # return patched forward back - return results + def forward(ctx, *args, **kwargs): + # Temporarily restore the original forward function of `module` to avoid + # recursion issues in `evaluate`, then revert it back. + patched_forward = m.forward + # set original forward for the module + m.forward = getattr(m, orig_forward_name) + # call user code + results = extension.evaluate(m, *args, **kwargs) + m.forward = patched_forward # return patched forward back + return results def new_forward(*args, **kwargs): - Trampoline.stashed_args = args - Trampoline.stashed_kwargs = kwargs return extension.convert(m, Trampoline.apply, *args, **kwargs) + + # make signature of new_forward same as of forward + new_forward = functools.wraps(m.forward)(new_forward) setattr(m, orig_forward_name, m.forward) m.forward = new_forward for name, m in model.named_modules(): if hasattr(m, orig_forward_name): - # already patched, skipping with a warning because it is unexpected - log.warning("Unexpectedly found already patched module %s while applying " - "ModuleExtension during PyTorch model conversion. " - "Result of the conversion maybe broken. Depending on the exact issue " - "it may lead to broken original model.", name) + # already patched, skipping. It may happen when patching applied for same module twice + log.debug("Unexpectedly found already patched module %s while applying " + "ModuleExtension during PyTorch model conversion. " + "Result of the conversion maybe broken. Depending on the exact issue " + "it may lead to broken original model.", name) continue + module_patcher(m, name) @@ -97,27 +85,38 @@ def __make_16bit_traceable(model: torch.nn.Module): extensions = { torch.nn.Linear: ModuleExtension( torch.nn.Linear, "ov_ext::linear", + convert=lambda module, target_op, *args, **kwargs: target_op(args[0], + module.weight, + module.bias), evaluate=lambda module, *args, **kwargs: torch.full( - list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32), - convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)), + list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32)), torch.nn.Embedding: ModuleExtension( torch.nn.Embedding, "ov_ext::embedding", + convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, + args[0], + module.padding_idx, + module.scale_grad_by_freq, + module.sparse), evaluate=lambda module, *args, **kwargs: torch.full( - list(args[0].shape) + [module.embedding_dim], 0.5, dtype=torch.float32), - convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, args[0], module.padding_idx, module.scale_grad_by_freq, module.sparse)), + list(args[1].shape) + [module.embedding_dim], 0.5, dtype=torch.float32)), } try: from transformers.pytorch_utils import Conv1D extensions[Conv1D] = ModuleExtension( Conv1D, "ov_ext::conv1d", + convert=lambda module, target_op, *args, **kwargs: target_op(args[0], + module.weight, + module.bias), evaluate=lambda module, *args, **kwargs: torch.full( - list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32), - convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)) - except: + list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32)) + except ImportError: pass patch_model(model, extensions, "_openvino_module_extension_patch_orig_forward") + dtype_to_patch = [torch.float16, torch.bfloat16] for _, module in model.named_modules(): - if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)]) - or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])): + if (module.__class__ not in extensions and + (any(p.dtype in dtype_to_patch for p in module.parameters(False)) + or any(b.dtype in dtype_to_patch for b in module.buffers(False)))): + log.debug("Casting module %s to float32", module) module.float() diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py index c6c01fa98e5e99..9f2ef019769875 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py @@ -49,18 +49,22 @@ openvino_options = {} -@register_backend +# Disable regional compilation which was enabled by default from Torch 2.5.0 +if hasattr(torch._dynamo.config, "inline_inbuilt_nn_modules"): + torch._dynamo.config.inline_inbuilt_nn_modules=False + @fake_tensor_unsupported def openvino(subgraph, example_inputs, options=None): - if (_get_aot_autograd(options)): + if _get_aot_autograd(options): global openvino_options openvino_options = options decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list() - return aot_autograd(fw_compiler=fx_openvino, - bw_compiler=fx_openvino, - decompositions=get_decompositions(decompositions))(subgraph, example_inputs) + return aot_autograd(fw_compiler=fx_openvino, bw_compiler=fx_openvino, decompositions=get_decompositions(decompositions))(subgraph, example_inputs) return fx_openvino(subgraph, example_inputs, options) +if "openvino" not in torch.compiler.list_backends(): + register_backend(compiler_fn=openvino, name="openvino") + def fx_openvino(subgraph, example_inputs, options=None): try: if len(openvino_options) != 0: @@ -70,7 +74,7 @@ def fx_openvino(subgraph, example_inputs, options=None): openvino_model_caching = _get_model_caching(options) if openvino_model_caching is not None and openvino_model_caching: # Create a hash to be used for caching - model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest() + model_hash_str = sha256(subgraph.code.encode("utf-8")).hexdigest() executor_parameters = {"model_hash_str": model_hash_str} # Check if the model was fully supported and already cached example_inputs.reverse() @@ -79,15 +83,17 @@ def fx_openvino(subgraph, example_inputs, options=None): if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"): # Model is fully supported and already cached. Run the cached OV model directly. compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, options, *example_inputs) + def _call(*args): res = execute_cached(compiled_model, *args) return res + return _call if inputs_reversed: example_inputs.reverse() preserved_arg_indices = [] - if (_get_aot_autograd(options)): + if _get_aot_autograd(options): if tracing_context := torch._guards.TracingContext.try_get(): fw_metadata = tracing_context.fw_metadata params_flat = tracing_context.params_flat @@ -97,6 +103,7 @@ def _call(*args): model = subgraph else: from torch._subclasses.fake_tensor import FakeTensorMode + decompositions = _get_decompositions(options) + get_inf_decomposition_list() with FakeTensorMode(allow_non_fake_inputs=True): model = make_fx(subgraph, decomposition_table=get_decompositions(decompositions))(*example_inputs) @@ -106,26 +113,27 @@ def _call(*args): partitioner = Partitioner(options) compiled_model = partitioner.make_partitions(model, options) - if executor_parameters is not None and 'model_hash_str' in executor_parameters: + if executor_parameters is not None and "model_hash_str" in executor_parameters: # Check if the model is fully supported. fully_supported = partitioner.check_fully_supported(compiled_model) if fully_supported: executor_parameters["model_hash_str"] += "_fs" def _call(*args): - if(_get_aot_autograd(options)): + if _get_aot_autograd(options): args_list = args[0] args_new = [args_list[i] for i in preserved_arg_indices] args = args_new - res = execute(compiled_model, *args, executor="openvino", - executor_parameters=executor_parameters, options=options) + res = execute(compiled_model, *args, executor="openvino", executor_parameters=executor_parameters, options=options) return res - if(_get_aot_autograd(options)): - _call._boxed_call = True # type: ignore[attr-defined] + + if _get_aot_autograd(options): + _call._boxed_call = True # type: ignore[attr-defined] return _call except Exception as e: logger.debug(f"Failed in OpenVINO execution: {e}") return compile_fx(subgraph, example_inputs) + def reset(): clear_caches() diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py index a99fdb4ebc6d45..bb272b4f9adb53 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py @@ -114,9 +114,28 @@ def capture_gptq_patterns(self, graph_module: GraphModule): for pattern_op in enabled_ops: self.supported_ops.enable_by_name(pattern_op) + def capture_nncf_patterns(self, graph_module: GraphModule): + const_node = PatternNode + const_node.op_types["get_attr"] = None + bitwise_right_shift_node = PatternNode + bitwise_right_shift_node.op_types["call_function:aten.bitwise_right_shift.Tensor_Scalar"] = [const_node] + bitwise_and_node = PatternNode + bitwise_and_node.op_types["call_function:aten.bitwise_and.Scalar"] = [const_node,] + stack_node = PatternNode + stack_node.op_types["call_function:aten.stack.default"] = [bitwise_and_node, bitwise_right_shift_node] + + for node in graph_module.graph.nodes: + if str(node.op) == "call_function" and str(node.target) == "aten.stack.default": + enabled_ops = [] + pattern_match = self.check_pattern(node, bitwise_and_node, enabled_ops) + if pattern_match: + for pattern_op in enabled_ops: + self.supported_ops.enable_by_name(pattern_op) + def make_partitions(self, graph_module: GraphModule, options) -> GraphModule: allow_single_node_partition = _is_testing(options) self.capture_gptq_patterns(graph_module) + self.capture_nncf_patterns(graph_module) partitioner = CapabilityBasedPartitioner( graph_module, self.supported_ops, allows_single_node_partition=allow_single_node_partition) partitions = partitioner.propose_partitions() diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index 06a58eb80ffed1..826d766505fa79 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -46,6 +46,15 @@ def get_type_from_py_type(value): def torch_tensor_to_ov_const(torch_t: torch.Tensor, shared_memory=True): + is_fake_tensor = False + try: + from torch._prims import FakeTensor + is_fake_tensor = isinstance(torch_t, FakeTensor) + except: + pass + assert not is_fake_tensor, '`FakeTensor` is found in the graph during conversion. ' \ + 'In order to avoid `FakeTensor` in the traced model, ' \ + 'try to infer the model before exporting.' torch_t = torch_t.contiguous() if torch_t.dtype == torch.bfloat16: # reinterpret bfloat16 data as float16 to allow conversion to numpy diff --git a/src/bindings/python/src/openvino/runtime/opset1/__init__.py b/src/bindings/python/src/openvino/runtime/opset1/__init__.py index 5bfa42f43f26b9..ca7e1aef385b2a 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset1/__init__.py @@ -54,7 +54,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset1/ops.py b/src/bindings/python/src/openvino/runtime/opset1/ops.py index c100a6c2db2cb4..54f32d404336d4 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset1/ops.py @@ -1532,95 +1532,6 @@ def lstm_cell( return _get_node_factory_opset1().create("LSTMCell", node_inputs, attributes) -@deprecated(version="2025.0", message="Use lstm_sequence from opset 5") -@nameable_op -def lstm_sequence( - X: NodeInput, - initial_hidden_state: NodeInput, - initial_cell_state: NodeInput, - sequence_lengths: NodeInput, - W: NodeInput, - R: NodeInput, - B: NodeInput, - hidden_size: int, - direction: str, - activations: Optional[List[str]] = None, - activations_alpha: Optional[List[float]] = None, - activations_beta: Optional[List[float]] = None, - clip: float = 0.0, - name: Optional[str] = None, -) -> Node: - """Return a node which performs LSTMSequence operation. - - :param X: The input tensor. Shape: [batch_size, seq_length, input_size]. - :param initial_hidden_state: The hidden state tensor. - Shape: [batch_size, num_directions, hidden_size]. - :param initial_cell_state: The cell state tensor. - Shape: [batch_size, num_directions, hidden_size]. - :param sequence_lengths: Specifies real sequence lengths for each batch element. - Shape: [batch_size]. Integer type. - :param W: Tensor with weights for matrix multiplication operation with input portion of data. - Shape: [num_directions, 4*hidden_size, input_size]. - :param R: The tensor with weights for matrix multiplication operation with hidden state. - Shape: [num_directions, 4*hidden_size, hidden_size]. - :param B: The tensor with biases. - Shape: [num_directions, 4*hidden_size]. - :param hidden_size: Specifies hidden state size. - :param direction: Specifies if the RNN is forward, reverse, or bidirectional. - :param activations: The list of three activation functions for gates. - :param activations_alpha: The list of alpha parameters for activation functions. - :param activations_beta: The list of beta parameters for activation functions. - :param clip: Specifies bound values [-C, C] for tensor clipping performed before activations. - :param name: An optional name of the output node. - - :return: The new node represents LSTMSequence. Node outputs count: 3. - """ - if activations is None: - activations = ["sigmoid", "tanh", "tanh"] - if activations_alpha is None: - activations_alpha = [] - if activations_beta is None: - activations_beta = [] - - node_inputs = as_nodes( - X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - name=name, - ) - - # P - nGraph additional input, no such input in the OV spec - peepholes_count = 3 # nGraph default - if direction.lower() == "bidirectional": - num_directions = 2 - else: - num_directions = 1 - peepholes_shape = [num_directions, peepholes_count * hidden_size] - peepholes_array = np.zeros(peepholes_shape) # nGraph default - data_dtype = get_dtype(node_inputs[0].get_output_element_type(0)) - default_p = make_constant_node(peepholes_array, dtype=data_dtype) - node_inputs.append(default_p) - - weights_format = "fico" # OV LSTMWeightsFormat, no such attribute in the OV spec - input_forget = False # nGraph default, no such attribute in the OV spec - - attributes = { - "hidden_size": hidden_size, - "direction": direction.lower(), - "activations": activations, - "activations_alpha": activations_alpha, - "activations_beta": activations_beta, - "clip": clip, - "weights_format": weights_format, - "input_forget": input_forget, - } - return _get_node_factory_opset1().create("LSTMSequence", node_inputs, attributes) - - @nameable_op def matmul( data_a: NodeInput, diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops.py b/src/bindings/python/src/openvino/runtime/opset13/ops.py index cb201d3d4263dd..a624ffb4f79873 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset13/ops.py @@ -15,7 +15,7 @@ from openvino.runtime.op import Constant, Result from openvino.runtime.opset1 import convert_like from openvino.runtime.opset_utils import _get_node_factory -from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op, overloading from openvino.runtime.utils.types import ( NumericData, NodeInput, @@ -271,7 +271,7 @@ def scaled_dot_product_attention( return _get_node_factory_opset13().create("ScaledDotProductAttention", inputs, attributes) -@singledispatch +@overloading(Union[NumericData, np.number, bool, np.bool_, list], Union[NumericType, Type], Optional[str], bool) # type: ignore @nameable_op def constant( value: Union[NumericData, np.number, bool, np.bool_, list], @@ -339,9 +339,9 @@ def display_shared_memory_warning(warning_message: str) -> None: return Constant(_value, shared_memory=_shared_memory) -@constant.register +@overloading(Tensor, bool, Optional[str]) # type: ignore @nameable_op -def _( +def constant( # noqa: F811 tensor: Tensor, shared_memory: bool = False, name: Optional[str] = None, diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index 96643a7e93d596..c4dd48d9087ae1 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -2,10 +2,7 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Inlcudes new operators added in Opset15 - -# TODO (ticket 138273): Add previous opset operators at the end of opset15 development -from openvino.runtime.opset1.ops import parameter +# New operations added in Opset15 from openvino.runtime.opset15.ops import col2im from openvino.runtime.opset15.ops import embedding_bag_offsets from openvino.runtime.opset15.ops import embedding_bag_packed @@ -16,3 +13,192 @@ from openvino.runtime.opset15.ops import bitwise_left_shift from openvino.runtime.opset15.ops import bitwise_right_shift from openvino.runtime.opset15.ops import slice_scatter + +# Operators from previous opsets +from openvino.runtime.opset1.ops import absolute +from openvino.runtime.opset1.ops import absolute as abs +from openvino.runtime.opset1.ops import acos +from openvino.runtime.opset4.ops import acosh +from openvino.runtime.opset8.ops import adaptive_avg_pool +from openvino.runtime.opset8.ops import adaptive_max_pool +from openvino.runtime.opset1.ops import add +from openvino.runtime.opset1.ops import asin +from openvino.runtime.opset4.ops import asinh +from openvino.runtime.opset6.ops import assign +from openvino.runtime.opset1.ops import atan +from openvino.runtime.opset4.ops import atanh +from openvino.runtime.opset14.ops import avg_pool +from openvino.runtime.opset5.ops import batch_norm_inference +from openvino.runtime.opset2.ops import batch_to_space +from openvino.runtime.opset1.ops import binary_convolution +from openvino.runtime.opset13.ops import bitwise_and +from openvino.runtime.opset13.ops import bitwise_not +from openvino.runtime.opset13.ops import bitwise_or +from openvino.runtime.opset13.ops import bitwise_xor +from openvino.runtime.opset3.ops import broadcast +from openvino.runtime.opset3.ops import bucketize +from openvino.runtime.opset1.ops import ceiling +from openvino.runtime.opset1.ops import ceiling as ceil +from openvino.runtime.opset1.ops import clamp +from openvino.runtime.opset1.ops import concat +from openvino.runtime.opset13.ops import constant +from openvino.runtime.opset1.ops import convert +from openvino.runtime.opset1.ops import convert_like +from openvino.runtime.opset14.ops import convert_promote_types +from openvino.runtime.opset1.ops import convolution +from openvino.runtime.opset1.ops import convolution_backprop_data +from openvino.runtime.opset1.ops import cos +from openvino.runtime.opset1.ops import cosh +from openvino.runtime.opset1.ops import ctc_greedy_decoder +from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.runtime.opset4.ops import ctc_loss +from openvino.runtime.opset3.ops import cum_sum +from openvino.runtime.opset3.ops import cum_sum as cumsum +from openvino.runtime.opset8.ops import deformable_convolution +from openvino.runtime.opset1.ops import deformable_psroi_pooling +from openvino.runtime.opset1.ops import depth_to_space +from openvino.runtime.opset8.ops import detection_output +from openvino.runtime.opset7.ops import dft +from openvino.runtime.opset1.ops import divide +from openvino.runtime.opset7.ops import einsum +from openvino.runtime.opset1.ops import elu +from openvino.runtime.opset3.ops import embedding_bag_offsets_sum +from openvino.runtime.opset3.ops import embedding_bag_packed_sum +from openvino.runtime.opset3.ops import embedding_segments_sum +from openvino.runtime.opset3.ops import extract_image_patches +from openvino.runtime.opset1.ops import equal +from openvino.runtime.opset1.ops import erf +from openvino.runtime.opset1.ops import exp +from openvino.runtime.opset9.ops import eye +from openvino.runtime.opset13.ops import fake_convert +from openvino.runtime.opset13.ops import fake_quantize +from openvino.runtime.opset1.ops import floor +from openvino.runtime.opset1.ops import floor_mod +from openvino.runtime.opset8.ops import gather +from openvino.runtime.opset6.ops import gather_elements +from openvino.runtime.opset8.ops import gather_nd +from openvino.runtime.opset1.ops import gather_tree +from openvino.runtime.opset7.ops import gelu +from openvino.runtime.opset9.ops import generate_proposals +from openvino.runtime.opset1.ops import greater +from openvino.runtime.opset1.ops import greater_equal +from openvino.runtime.opset9.ops import grid_sample +from openvino.runtime.opset1.ops import grn +from openvino.runtime.opset1.ops import group_convolution +from openvino.runtime.opset1.ops import group_convolution_backprop_data +from openvino.runtime.opset12.ops import group_normalization +from openvino.runtime.opset3.ops import gru_cell +from openvino.runtime.opset5.ops import gru_sequence +from openvino.runtime.opset1.ops import hard_sigmoid +from openvino.runtime.opset5.ops import hsigmoid +from openvino.runtime.opset4.ops import hswish +from openvino.runtime.opset7.ops import idft +from openvino.runtime.opset8.ops import if_op +from openvino.runtime.opset11.ops import interpolate +from openvino.runtime.opset14.ops import inverse +from openvino.runtime.opset9.ops import irdft +from openvino.runtime.opset10.ops import is_finite +from openvino.runtime.opset10.ops import is_inf +from openvino.runtime.opset10.ops import is_nan +from openvino.runtime.opset8.ops import i420_to_bgr +from openvino.runtime.opset8.ops import i420_to_rgb +from openvino.runtime.opset1.ops import less +from openvino.runtime.opset1.ops import less_equal +from openvino.runtime.opset1.ops import log +from openvino.runtime.opset1.ops import logical_and +from openvino.runtime.opset1.ops import logical_not +from openvino.runtime.opset1.ops import logical_or +from openvino.runtime.opset1.ops import logical_xor +from openvino.runtime.opset5.ops import log_softmax +from openvino.runtime.opset5.ops import loop +from openvino.runtime.opset1.ops import lrn +from openvino.runtime.opset4.ops import lstm_cell +from openvino.runtime.opset5.ops import lstm_sequence +from openvino.runtime.opset1.ops import matmul +from openvino.runtime.opset8.ops import matrix_nms +from openvino.runtime.opset14.ops import max_pool +from openvino.runtime.opset1.ops import maximum +from openvino.runtime.opset1.ops import minimum +from openvino.runtime.opset4.ops import mish +from openvino.runtime.opset1.ops import mod +from openvino.runtime.opset9.ops import multiclass_nms +from openvino.runtime.opset13.ops import multinomial +from openvino.runtime.opset1.ops import multiply +from openvino.runtime.opset6.ops import mvn +from openvino.runtime.opset1.ops import negative +from openvino.runtime.opset13.ops import nms_rotated +from openvino.runtime.opset9.ops import non_max_suppression +from openvino.runtime.opset3.ops import non_zero +from openvino.runtime.opset1.ops import normalize_l2 +from openvino.runtime.opset1.ops import not_equal +from openvino.runtime.opset8.ops import nv12_to_bgr +from openvino.runtime.opset8.ops import nv12_to_rgb +from openvino.runtime.opset1.ops import one_hot +from openvino.runtime.opset12.ops import pad +from openvino.runtime.opset1.ops import parameter +from openvino.runtime.opset1.ops import power +from openvino.runtime.opset1.ops import prelu +from openvino.runtime.opset8.ops import prior_box +from openvino.runtime.opset1.ops import prior_box_clustered +from openvino.runtime.opset1.ops import psroi_pooling +from openvino.runtime.opset4.ops import proposal +from openvino.runtime.opset4.ops import range +from openvino.runtime.opset8.ops import random_uniform +from openvino.runtime.opset9.ops import rdft +from openvino.runtime.opset6.ops import read_value +from openvino.runtime.opset4.ops import reduce_l1 +from openvino.runtime.opset4.ops import reduce_l2 +from openvino.runtime.opset1.ops import reduce_logical_and +from openvino.runtime.opset1.ops import reduce_logical_or +from openvino.runtime.opset1.ops import reduce_max +from openvino.runtime.opset1.ops import reduce_mean +from openvino.runtime.opset1.ops import reduce_min +from openvino.runtime.opset1.ops import reduce_prod +from openvino.runtime.opset1.ops import reduce_sum +from openvino.runtime.opset1.ops import region_yolo +from openvino.runtime.opset2.ops import reorg_yolo +from openvino.runtime.opset1.ops import relu +from openvino.runtime.opset1.ops import reshape +from openvino.runtime.opset13.ops import result +from openvino.runtime.opset1.ops import reverse_sequence +from openvino.runtime.opset3.ops import rnn_cell +from openvino.runtime.opset5.ops import rnn_sequence +from openvino.runtime.opset9.ops import roi_align +from openvino.runtime.opset2.ops import roi_pooling +from openvino.runtime.opset7.ops import roll +from openvino.runtime.opset5.ops import round +from openvino.runtime.opset13.ops import scaled_dot_product_attention +from openvino.runtime.opset12.ops import scatter_elements_update +from openvino.runtime.opset3.ops import scatter_update +from openvino.runtime.opset15.ops import search_sorted +from openvino.runtime.opset1.ops import select +from openvino.runtime.opset1.ops import selu +from openvino.runtime.opset3.ops import shape_of +from openvino.runtime.opset3.ops import shuffle_channels +from openvino.runtime.opset1.ops import sigmoid +from openvino.runtime.opset1.ops import sign +from openvino.runtime.opset1.ops import sin +from openvino.runtime.opset1.ops import sinh +from openvino.runtime.opset8.ops import slice +from openvino.runtime.opset8.ops import softmax +from openvino.runtime.opset4.ops import softplus +from openvino.runtime.opset9.ops import softsign +from openvino.runtime.opset2.ops import space_to_batch +from openvino.runtime.opset1.ops import space_to_depth +from openvino.runtime.opset1.ops import split +from openvino.runtime.opset1.ops import sqrt +from openvino.runtime.opset1.ops import squared_difference +from openvino.runtime.opset15.ops import squeeze +from openvino.runtime.opset15.ops import stft +from openvino.runtime.opset1.ops import strided_slice +from openvino.runtime.opset1.ops import subtract +from openvino.runtime.opset4.ops import swish +from openvino.runtime.opset1.ops import tan +from openvino.runtime.opset1.ops import tanh +from openvino.runtime.opset1.ops import tensor_iterator +from openvino.runtime.opset1.ops import tile +from openvino.runtime.opset11.ops import topk +from openvino.runtime.opset1.ops import transpose +from openvino.runtime.opset10.ops import unique +from openvino.runtime.opset1.ops import unsqueeze +from openvino.runtime.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/runtime/opset15/ops.py index 116f63726bfeb6..93aacb29572340 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset15/ops.py @@ -303,3 +303,87 @@ def slice_scatter( inputs = as_nodes(data, updates, start, stop, step, axes, name=name) return _get_node_factory_opset15().create("SliceScatter", inputs) + + +@nameable_op +def stft( + data: NodeInput, + window: NodeInput, + frame_size: NodeInput, + frame_step: NodeInput, + transpose_frames: bool, + name: Optional[str] = None, +) -> Node: + """Return a node which generates STFT operation. + + :param data: The node providing input data. + :param window: The node providing window data. + :param frame_size: The node with scalar value representing the size of Fourier Transform. + :param frame_step: The distance (number of samples) between successive window frames. + :param transpose_frames: Flag to set output shape layout. If true the `frames` dimension is at out_shape[2], + otherwise it is at out_shape[1]. + :param name: The optional name for the created output node. + :return: The new node performing STFT operation. + """ + inputs = as_nodes(data, window, frame_size, frame_step, name=name) + return _get_node_factory_opset15().create("STFT", inputs, {"transpose_frames": transpose_frames}) + + +@nameable_op +def search_sorted( + sorted_sequence: NodeInput, + values: NodeInput, + right_mode: bool = False, + name: Optional[str] = None, +) -> Node: + """Return a node which generates SearchSorted operation. + + :param sorted_sequence: The node providing sorted sequence to search in. + :param values: The node providing searched values. + :param right_mode: If set to False, return the first suitable index that is found for given value. + If set to True, return the last such index. Defaults to False. + :param name: The optional name for the created output node. + :return: The new node performing SearchSorted operation. + """ + inputs = as_nodes(sorted_sequence, values, name=name) + attributes = {"right_mode": right_mode} + return _get_node_factory_opset15().create("SearchSorted", inputs, attributes) + + +@nameable_op +def squeeze( + data: NodeInput, + axes: Optional[NodeInput] = None, + allow_axis_skip: bool = False, + name: Optional[str] = None, +) -> Node: + """Perform squeeze operation on input tensor. + + :param data: The node with data tensor. + :param axes: Optional list of integers, indicating the dimensions to squeeze. + Negative indices are supported. One of: input node or array. + :param allow_axis_skip: If true, shape inference results in a dynamic rank, when + selected axis has value 1 in its dynamic range. Used only if axes input + is given. Defaults to false. + :param name: Optional new name for output node. + :return: The new node performing a squeeze operation on input tensor. + + Remove single-dimensional entries from the shape of a tensor. + Takes an optional parameter `axes` with a list of axes to squeeze. + If `axes` is not provided, all the single dimensions will be removed from the shape. + + For example: + + Inputs: tensor with shape [1, 2, 1, 3, 1, 1], axes=[2, 4] + + Result: tensor with shape [1, 2, 3, 1] + """ + if axes is None: + inputs = as_nodes(data, name=name) + else: + inputs = as_nodes(data, axes, name=name) + return _get_node_factory_opset15().create( + "Squeeze", + inputs, + {"allow_axis_skip": allow_axis_skip} + ) diff --git a/src/bindings/python/src/openvino/runtime/opset16/__init__.py b/src/bindings/python/src/openvino/runtime/opset16/__init__.py new file mode 100644 index 00000000000000..ce52690e919fc3 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset16/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# New operations added in Opset16 +from openvino.runtime.opset16.ops import identity + +# Operators from previous opsets +# TODO (ticket: 156877): Add previous opset operators at the end of opset16 development diff --git a/src/bindings/python/src/openvino/runtime/opset16/ops.py b/src/bindings/python/src/openvino/runtime/opset16/ops.py new file mode 100644 index 00000000000000..60656f6d993b6a --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset16/ops.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Factory functions for ops added to openvino opset16.""" +from functools import partial +from typing import Optional + +from openvino.runtime import Node +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.types import NodeInput, as_nodes + +_get_node_factory_opset16 = partial(_get_node_factory, "opset16") + +# -------------------------------------------- ops ------------------------------------------------ + + +@nameable_op +def identity( + data: NodeInput, + name: Optional[str] = None, +) -> Node: + """Identity operation is used as a placeholder. It creates a copy of the input to forward to the output. + + :param data: Tensor with data. + + :return: The new node performing Identity operation. + """ + return _get_node_factory_opset16().create( + "Identity", + as_nodes(data, name=name), + {}, + ) diff --git a/src/bindings/python/src/openvino/runtime/opset2/__init__.py b/src/bindings/python/src/openvino/runtime/opset2/__init__.py index 34d0d9b6737709..6624149e157e9e 100644 --- a/src/bindings/python/src/openvino/runtime/opset2/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset2/__init__.py @@ -56,7 +56,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset3/__init__.py b/src/bindings/python/src/openvino/runtime/opset3/__init__.py index 964acb15b34bbc..5bd68912cae807 100644 --- a/src/bindings/python/src/openvino/runtime/opset3/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset3/__init__.py @@ -65,7 +65,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset4/__init__.py b/src/bindings/python/src/openvino/runtime/opset4/__init__.py index bf57172bed40e2..e7cef8eb216d17 100644 --- a/src/bindings/python/src/openvino/runtime/opset4/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset4/__init__.py @@ -70,7 +70,6 @@ from openvino.runtime.opset1.ops import logical_xor from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/properties/__init__.py b/src/bindings/python/src/openvino/runtime/properties/__init__.py index caaa93f37223b0..3269ea42e32ac2 100644 --- a/src/bindings/python/src/openvino/runtime/properties/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/__init__.py @@ -29,6 +29,7 @@ from openvino._pyopenvino.properties import execution_devices from openvino._pyopenvino.properties import loaded_from_cache from openvino._pyopenvino.properties import cache_encryption_callbacks +from openvino._pyopenvino.properties import weights_path # Submodules from openvino.runtime.properties import hint diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index dd90ded374ca11..d1dce289d09941 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -23,3 +23,4 @@ from openvino._pyopenvino.properties.hint import allow_auto_batching from openvino._pyopenvino.properties.hint import dynamic_quantization_group_size from openvino._pyopenvino.properties.hint import kv_cache_precision +from openvino._pyopenvino.properties.hint import activations_scale_factor diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index a202c3a3801001..10ae0ed0ea6042 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -358,10 +358,14 @@ py::array array_from_constant_copy(ov::op::v0::Constant&& c, py::dtype& dst_dtyp py::array array_from_constant_view(ov::op::v0::Constant&& c) { const auto& ov_type = c.get_element_type(); const auto dtype = Common::type_helpers::get_dtype(ov_type); + py::array data; if (ov_type.bitwidth() < Common::values::min_bitwidth) { - return py::array(dtype, c.get_byte_size(), c.get_data_ptr(), py::cast(c)); + data = py::array(dtype, c.get_byte_size(), c.get_data_ptr(), py::cast(c)); + } else { + data = py::array(dtype, c.get_shape(), constant_helpers::_get_strides(c), c.get_data_ptr(), py::cast(c)); } - return py::array(dtype, c.get_shape(), constant_helpers::_get_strides(c), c.get_data_ptr(), py::cast(c)); + data.attr("flags").attr("writeable") = false; + return data; } }; // namespace array_helpers diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp index 6cf405cd167423..68e3e5cc4841ed 100644 --- a/src/bindings/python/src/pyopenvino/core/core.cpp +++ b/src/bindings/python/src/pyopenvino/core/core.cpp @@ -496,50 +496,6 @@ void regclass_Core(py::module m) { :rtype: openvino.runtime.Model )"); - cls.def( - "import_model", - [](ov::Core& self, - const std::string& model_stream, - const std::string& device_name, - const std::map& properties) { - auto _properties = Common::utils::properties_to_any_map(properties); - py::gil_scoped_release release; - std::stringstream _stream; - _stream << model_stream; - return self.import_model(_stream, device_name, _properties); - }, - py::arg("model_stream"), - py::arg("device_name"), - py::arg("properties"), - R"( - Imports a compiled model from a previously exported one. - - GIL is released while running this function. - - :param model_stream: Input stream, containing a model previously exported, using export_model method. - :type model_stream: bytes - :param device_name: Name of device to which compiled model is imported. - Note: if device_name is not used to compile the original model, an exception is thrown. - :type device_name: str - :param properties: Optional map of pairs: (property name, property value) relevant only for this load operation. - :type properties: dict, optional - :return: A compiled model. - :rtype: openvino.runtime.CompiledModel - - :Example: - .. code-block:: python - - user_stream = compiled.export_model() - - with open('./my_model', 'wb') as f: - f.write(user_stream) - - # ... - - new_compiled = core.import_model(user_stream, "CPU") - )"); - - // keep as second one to solve overload resolution problem cls.def( "import_model", [](ov::Core& self, @@ -547,46 +503,26 @@ void regclass_Core(py::module m) { const std::string& device_name, const std::map& properties) { const auto _properties = Common::utils::properties_to_any_map(properties); - if (!(py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO")))) { + if (!(py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO"))) && + !py::isinstance(model_stream)) { throw py::type_error("CompiledModel.import_model(model_stream) incompatible function argument: " - "`model_stream` must be an io.BytesIO object but " + + "`model_stream` must be an io.BytesIO object or bytes but " + (std::string)(py::repr(model_stream)) + "` provided"); } - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> distr(1000, 9999); - std::string filename = "model_stream_" + std::to_string(distr(gen)) + ".txt"; - std::fstream _stream(filename, std::ios::out | std::ios::binary); - model_stream.attr("seek")(0); // Always rewind stream! - if (_stream.is_open()) { - const py::bytes data = model_stream.attr("read")(); - // convert the Python bytes object to C++ string - char* buffer; - Py_ssize_t length; - PYBIND11_BYTES_AS_STRING_AND_SIZE(data.ptr(), &buffer, &length); - _stream.write(buffer, length); - _stream.close(); - } else { - OPENVINO_THROW("Failed to open temporary file for model stream"); - } + py::buffer_info info; - ov::CompiledModel result; - std::fstream _fstream(filename, std::ios::in | std::ios::binary); - if (_fstream.is_open()) { - py::gil_scoped_release release; - result = self.import_model(_fstream, device_name, _properties); - _fstream.close(); - if (std::remove(filename.c_str()) != 0) { - const std::string abs_path = - py::module_::import("os").attr("getcwd")().cast() + "/" + filename; - const std::string warning_message = "Temporary file " + abs_path + " failed to delete!"; - PyErr_WarnEx(PyExc_RuntimeWarning, warning_message.c_str(), 1); - } + if (py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO"))) { + model_stream.attr("seek")(0); + info = py::buffer(model_stream.attr("getbuffer")()).request(); } else { - OPENVINO_THROW("Failed to open temporary file for model stream"); + info = py::buffer(model_stream).request(); } - return result; + Common::utils::MemoryBuffer mb(reinterpret_cast(info.ptr), info.size); + std::istream stream(&mb); + + py::gil_scoped_release release; + return self.import_model(stream, device_name, _properties); }, py::arg("model_stream"), py::arg("device_name"), @@ -601,7 +537,7 @@ void regclass_Core(py::module m) { :param model_stream: Input stream, containing a model previously exported, using export_model method. - :type model_stream: io.BytesIO + :type model_stream: Union[io.BytesIO, bytes] :param device_name: Name of device to which compiled model is imported. Note: if device_name is not used to compile the original model, an exception is thrown. :type device_name: str diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 470161d9779558..564e5f69f5ee14 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -43,6 +43,7 @@ void regmodule_properties(py::module m) { OPENVINO_SUPPRESS_DEPRECATED_END wrap_property_RW(m_properties, ov::force_tbb_terminate, "force_tbb_terminate"); wrap_property_RW(m_properties, ov::enable_mmap, "enable_mmap"); + wrap_property_RW(m_properties, ov::weights_path, "weights_path"); wrap_property_RO(m_properties, ov::supported_properties, "supported_properties"); wrap_property_RO(m_properties, ov::available_devices, "available_devices"); @@ -100,6 +101,7 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_hint, ov::hint::allow_auto_batching, "allow_auto_batching"); wrap_property_RW(m_hint, ov::hint::dynamic_quantization_group_size, "dynamic_quantization_group_size"); wrap_property_RW(m_hint, ov::hint::kv_cache_precision, "kv_cache_precision"); + wrap_property_RW(m_hint, ov::hint::activations_scale_factor, "activations_scale_factor"); // Submodule intel_cpu py::module m_intel_cpu = diff --git a/src/bindings/python/src/pyopenvino/frontend/frontend.cpp b/src/bindings/python/src/pyopenvino/frontend/frontend.cpp index afc9e0af361c52..758fb505f5f885 100644 --- a/src/bindings/python/src/pyopenvino/frontend/frontend.cpp +++ b/src/bindings/python/src/pyopenvino/frontend/frontend.cpp @@ -20,13 +20,6 @@ namespace py = pybind11; using namespace ov::frontend; -class MemoryBuffer : public std::streambuf { -public: - MemoryBuffer(char* data, std::size_t size) { - setg(data, data, data + size); - } -}; - void regclass_frontend_FrontEnd(py::module m) { py::class_> fem(m, "FrontEnd", py::dynamic_attr(), py::module_local()); fem.doc() = "openvino.frontend.FrontEnd wraps ov::frontend::FrontEnd"; @@ -57,7 +50,7 @@ void regclass_frontend_FrontEnd(py::module m) { } else if (py::isinstance(py_obj, pybind11::module::import("io").attr("BytesIO"))) { // support of BytesIO py::buffer_info info = py::buffer(py_obj.attr("getbuffer")()).request(); - MemoryBuffer mb(reinterpret_cast(info.ptr), info.size); + Common::utils::MemoryBuffer mb(reinterpret_cast(info.ptr), info.size); std::istream _istream(&mb); return self.load(&_istream, enable_mmap); } else { diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index e165c2e00b4808..c747e2d3b81166 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -258,20 +258,31 @@ std::map properties_to_any_map(const std::map(property_value)) { OPENVINO_THROW("The value type of ov::cache_encryption_callbacks property is expected list"); } + auto property_list = property_value.cast(); + // Wrapped to sp due-to we need to hold GIL upon destruction of python function + auto py_encrypt = std::shared_ptr(new py::function(std::move(property_list[0])), + [](py::function* py_encrypt) { + py::gil_scoped_acquire acquire; + delete py_encrypt; + }); + auto py_decrypt = std::shared_ptr(new py::function(std::move(property_list[1])), + [](py::function* py_decrypt) { + py::gil_scoped_acquire acquire; + delete py_decrypt; + }); + std::function encrypt_func = - [property_value](const std::string& in_str) -> std::string { + [py_encrypt](const std::string& in_str) -> std::string { // Acquire GIL, execute Python function py::gil_scoped_acquire acquire; - auto _list = property_value.cast(); - return _list[0](in_str).cast(); + return (*py_encrypt)(in_str).cast(); }; std::function decrypt_func = - [property_value](const std::string& in_str) -> std::string { + [py_decrypt](const std::string& in_str) -> std::string { // Acquire GIL, execute Python function py::gil_scoped_acquire acquire; - auto _list = property_value.cast(); - return _list[1](in_str).cast(); + return (*py_decrypt)(in_str).cast(); }; ov::EncryptionCallbacks encryption_callbacks{encrypt_func, decrypt_func}; properties_to_cpp[property.first] = encryption_callbacks; diff --git a/src/bindings/python/src/pyopenvino/utils/utils.hpp b/src/bindings/python/src/pyopenvino/utils/utils.hpp index b59ffe530f6045..2a7b6505269535 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.hpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.hpp @@ -32,6 +32,37 @@ namespace py = pybind11; namespace Common { namespace utils { +class MemoryBuffer : public std::streambuf { +public: + MemoryBuffer(char* data, std::size_t size) { + setg(data, data, data + size); + } + +protected: + pos_type seekoff(off_type off, + std::ios_base::seekdir dir, + std::ios_base::openmode which = std::ios_base::in) override { + switch (dir) { + case std::ios_base::beg: + setg(eback(), eback() + off, egptr()); + break; + case std::ios_base::end: + setg(eback(), egptr() + off, egptr()); + break; + case std::ios_base::cur: + setg(eback(), gptr() + off, egptr()); + break; + default: + return pos_type(off_type(-1)); + } + return (gptr() < eback() || gptr() > egptr()) ? pos_type(off_type(-1)) : pos_type(gptr() - eback()); + } + + pos_type seekpos(pos_type pos, std::ios_base::openmode which) override { + return seekoff(pos, std::ios_base::beg, which); + } +}; + enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL, PARTIAL_SHAPE }; struct EmptyList {}; diff --git a/src/bindings/python/tests/test_graph/test_constant.py b/src/bindings/python/tests/test_graph/test_constant.py index e28a4ad05510f2..7b349ad7cd94b1 100644 --- a/src/bindings/python/tests/test_graph/test_constant.py +++ b/src/bindings/python/tests/test_graph/test_constant.py @@ -87,7 +87,7 @@ def test_init_with_array(src_dtype, dst_dtype, shared_flag, data_getter): data = np.ascontiguousarray(data) # Create constant from based on numpy dtype or openvino type - ov_const = ops.constant(data, dtype=dst_dtype, shared_memory=shared_flag) + ov_const = ops.constant(data, dst_dtype, shared_memory=shared_flag) # Check shape and element type of Constant class assert isinstance(ov_const, Constant) @@ -205,53 +205,12 @@ def test_init_with_scalar(init_value, src_dtype, dst_dtype, shared_flag, data_ge assert np.allclose(const_data, expected_result) -@pytest.mark.parametrize( - ("src_dtype"), - [ - (np.float16), - (np.uint16), - ], -) -@pytest.mark.parametrize( - ("shared_flag"), - [ - (True), - (False), - ], -) -@pytest.mark.parametrize( - ("data_getter"), - [ - (DataGetter.COPY), - (DataGetter.VIEW), - ], -) -def test_init_bf16_populate(src_dtype, shared_flag, data_getter): - data = np.random.rand(1, 2, 16, 8) + 0.5 - data = data.astype(src_dtype) - - # To create bf16 constant, allocate memory and populate it: - init_data = np.zeros(shape=data.shape, dtype=src_dtype) - ov_const = ops.constant(init_data, dtype=Type.bf16, shared_memory=shared_flag) - ov_const.data[:] = data - - # Check shape and element type of Constant class - assert isinstance(ov_const, Constant) - assert np.all(list(ov_const.shape) == [1, 2, 16, 8]) - assert ov_const.get_element_type() == Type.bf16 - - _dst_dtype = Type.bf16.to_dtype() - - assert ov_const.get_element_type().to_dtype() == _dst_dtype - # Compare values to Constant - if data_getter == DataGetter.COPY: - const_data = ov_const.get_data() - elif data_getter == DataGetter.VIEW: - const_data = ov_const.data - else: - raise AttributeError("Unknown DataGetter passed!") - assert const_data.dtype == _dst_dtype - assert np.allclose(const_data, data) +def test_cant_change_data_in_const(): + arr_0 = np.ones([1, 3, 32, 32]) + ov_const = ops.constant(arr_0) + arr_1 = np.ones([1, 3, 32, 32]) + 1 + with pytest.raises(ValueError, match="assignment destination is read-only"): + ov_const.data[:] = arr_1 @pytest.mark.parametrize( @@ -286,58 +245,6 @@ def test_init_bf16_direct(ov_type, numpy_dtype, shared_flag): assert np.allclose(data, result, rtol=0.01) -@pytest.mark.parametrize( - "shape", - [ - ([1, 3, 28, 28]), - ([1, 3, 27, 27]), - ], -) -@pytest.mark.parametrize( - ("low", "high", "ov_type", "src_dtype"), - [ - (0, 2, Type.u1, np.uint8), - (0, 16, Type.u4, np.uint8), - (-8, 7, Type.i4, np.int8), - (0, 16, Type.nf4, np.uint8), - ], -) -@pytest.mark.parametrize( - ("shared_flag"), - [ - (True), - (False), - ], -) -@pytest.mark.parametrize( - ("data_getter"), - [ - (DataGetter.COPY), - (DataGetter.VIEW), - ], -) -def test_constant_helper_packing(shape, low, high, ov_type, src_dtype, shared_flag, data_getter): - data = np.random.uniform(low, high, shape).astype(src_dtype) - - # Allocate memory first: - ov_const = ops.constant(np.zeros(shape=data.shape, dtype=src_dtype), - dtype=ov_type, - shared_memory=shared_flag) - # Fill data with packed values - packed_data = pack_data(data, ov_const.get_element_type()) - ov_const.data[:] = packed_data - - # Always unpack the data! - if data_getter == DataGetter.COPY: - unpacked = unpack_data(ov_const.get_data(), ov_const.get_element_type(), ov_const.shape) - elif data_getter == DataGetter.VIEW: - unpacked = unpack_data(ov_const.data, ov_const.get_element_type(), ov_const.shape) - else: - raise AttributeError("Unknown DataGetter passed!") - - assert np.array_equal(unpacked, data) - - @pytest.mark.parametrize( ("ov_type", "src_dtype"), [ @@ -380,21 +287,6 @@ def test_constant_direct_packing(ov_type, src_dtype, shared_flag, data_getter): assert not np.shares_memory(unpacked, data) -@pytest.mark.parametrize( - ("shared_flag"), - [ - (True), - (False), - ], -) -def test_write_to_buffer(shared_flag): - arr_0 = np.ones([1, 3, 32, 32]) - ov_const = ops.constant(arr_0, shared_memory=shared_flag) - arr_1 = np.ones([1, 3, 32, 32]) + 1 - ov_const.data[:] = arr_1 - assert np.array_equal(ov_const.data, arr_1) - - @pytest.mark.parametrize( ("shared_flag"), [ @@ -842,7 +734,7 @@ def test_get_data_casting_bf16(src_dtype, dst_dtype, copy_flag): ) def test_get_data_casting_packed(src_dtype, ov_type, dst_dtype, copy_flag): data = np.array([[0, 0, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 1]], dtype=src_dtype) - ov_const = ops.constant(data, dtype=ov_type) + ov_const = ops.constant(value=data, dtype=ov_type) arr = ov_const.get_data(dtype=dst_dtype, copy=copy_flag) if dst_dtype is None: @@ -867,7 +759,7 @@ def test_const_from_tensor(shared_flag): shape = [1, 3, 32, 32] arr = np.ones(shape).astype(np.float32) ov_tensor = Tensor(arr, shape, Type.f32) - ov_const = ops.constant(ov_tensor, shared_memory=shared_flag) + ov_const = ops.constant(tensor=ov_tensor, shared_memory=shared_flag) assert isinstance(ov_const, Constant) assert np.all(list(ov_const.shape) == shape) diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index c5023588f5d55b..f86ea4a18a8ca1 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -315,264 +315,6 @@ def test_lstm_cell_operator(dtype): assert list(node_param.get_output_shape(1)) == [1, 128] -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("op_name", ["lstm", "lstmOpset1"]) -def test_lstm_cell_operator_opset1(dtype, op_name): - batch_size = 1 - input_size = 16 - hidden_size = 128 - - x_shape = [batch_size, input_size] - h_t_shape = [batch_size, hidden_size] - c_t_shape = [batch_size, hidden_size] - w_shape = [4 * hidden_size, input_size] - r_shape = [4 * hidden_size, hidden_size] - b_shape = [4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - node_default = ov_opset1.lstm_cell( - parameter_x, parameter_h_t, parameter_c_t, parameter_w, parameter_r, parameter_b, hidden_size, name=op_name, - ) - - assert node_default.get_type_name() == "LSTMCell" - assert node_default.get_friendly_name() == op_name - assert node_default.get_output_size() == 2 - assert list(node_default.get_output_shape(0)) == [1, 128] - assert list(node_default.get_output_shape(1)) == [1, 128] - - activations = ["tanh", "Sigmoid", "RELU"] - activation_alpha = [1.0, 2.0, 3.0] - activation_beta = [3.0, 2.0, 1.0] - clip = 0.5 - - node_param = ov_opset1.lstm_cell( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node_param.get_type_name() == "LSTMCell" - assert node_param.get_output_size() == 2 - assert list(node_param.get_output_shape(0)) == [1, 128] - assert list(node_param.get_output_shape(1)) == [1, 128] - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("op_name", ["lstm", "lstmOpset1"]) -def test_lstm_sequence_operator_bidirectional_opset1(dtype, op_name): - batch_size = 1 - input_size = 16 - hidden_size = 128 - num_directions = 2 - seq_length = 2 - - x_shape = [batch_size, seq_length, input_size] - h_t_shape = [batch_size, num_directions, hidden_size] - c_t_shape = [batch_size, num_directions, hidden_size] - seq_len_shape = [batch_size] - w_shape = [num_directions, 4 * hidden_size, input_size] - r_shape = [num_directions, 4 * hidden_size, hidden_size] - b_shape = [num_directions, 4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_seq_len = ov.parameter(seq_len_shape, name="seq_len", dtype=np.int32) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - direction = "BIDIRECTIONAL" - with pytest.warns(DeprecationWarning): - node = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - name=op_name, - ) - - assert node.get_type_name() == "LSTMSequence" - assert node.get_friendly_name() == op_name - assert node.get_output_size() == 3 - - activations = ["RELU", "tanh", "Sigmoid"] - activation_alpha = [1.0, 2.0, 3.0] - activation_beta = [3.0, 2.0, 1.0] - clip = 1.22 - - with pytest.warns(DeprecationWarning): - node_param = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node_param.get_type_name() == "LSTMSequence" - assert node_param.get_output_size() == 3 - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_lstm_sequence_operator_reverse_opset1(dtype): - batch_size = 2 - input_size = 4 - hidden_size = 3 - num_directions = 1 - seq_length = 2 - - x_shape = [batch_size, seq_length, input_size] - h_t_shape = [batch_size, num_directions, hidden_size] - c_t_shape = [batch_size, num_directions, hidden_size] - seq_len_shape = [batch_size] - w_shape = [num_directions, 4 * hidden_size, input_size] - r_shape = [num_directions, 4 * hidden_size, hidden_size] - b_shape = [num_directions, 4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_seq_len = ov.parameter(seq_len_shape, name="seq_len", dtype=np.int32) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - direction = "REVERSE" - with pytest.warns(DeprecationWarning): - node_default = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - ) - - assert node_default.get_type_name() == "LSTMSequence" - assert node_default.get_output_size() == 3 - - activations = ["RELU", "tanh", "Sigmoid"] - activation_alpha = [1.0, 2.0, 3.0] - activation_beta = [3.0, 2.0, 1.0] - clip = 1.22 - with pytest.warns(DeprecationWarning): - node_param = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node_param.get_type_name() == "LSTMSequence" - assert node_param.get_output_size() == 3 - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_lstm_sequence_operator_forward_opset1(dtype): - batch_size = 2 - input_size = 4 - hidden_size = 3 - num_directions = 1 - seq_length = 2 - - x_shape = [batch_size, seq_length, input_size] - h_t_shape = [batch_size, num_directions, hidden_size] - c_t_shape = [batch_size, num_directions, hidden_size] - seq_len_shape = [batch_size] - w_shape = [num_directions, 4 * hidden_size, input_size] - r_shape = [num_directions, 4 * hidden_size, hidden_size] - b_shape = [num_directions, 4 * hidden_size] - - parameter_x = ov.parameter(x_shape, name="X", dtype=dtype) - parameter_h_t = ov.parameter(h_t_shape, name="H_t", dtype=dtype) - parameter_c_t = ov.parameter(c_t_shape, name="C_t", dtype=dtype) - parameter_seq_len = ov.parameter(seq_len_shape, name="seq_len", dtype=np.int32) - parameter_w = ov.parameter(w_shape, name="W", dtype=dtype) - parameter_r = ov.parameter(r_shape, name="R", dtype=dtype) - parameter_b = ov.parameter(b_shape, name="B", dtype=dtype) - - direction = "forward" - with pytest.warns(DeprecationWarning): - node_default = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - ) - - assert node_default.get_type_name() == "LSTMSequence" - assert node_default.get_output_size() == 3 - - activations = ["RELU", "tanh", "Sigmoid"] - activation_alpha = [2.0] - activation_beta = [1.0] - clip = 0.5 - with pytest.warns(DeprecationWarning): - node = ov_opset1.lstm_sequence( - parameter_x, - parameter_h_t, - parameter_c_t, - parameter_seq_len, - parameter_w, - parameter_r, - parameter_b, - hidden_size, - direction, - activations, - activation_alpha, - activation_beta, - clip, - ) - - assert node.get_type_name() == "LSTMSequence" - assert node.get_output_size() == 3 - - def test_gru_cell_operator(): batch_size = 1 input_size = 16 @@ -2486,6 +2228,58 @@ def test_slice_scatter(): assert node_default_axes.get_output_shape(0) == data_shape +def test_stft(): + data_shape = [4, 48] + data = ov.parameter(data_shape, name="input", dtype=np.float32) + window = ov.parameter([7], name="window", dtype=np.float32) + frame_size = ov.constant(np.array(11, dtype=np.int32)) + frame_step = ov.constant(np.array(3, dtype=np.int32)) + + transpose_frames = False + op = ov_opset15.stft(data, window, frame_size, frame_step, transpose_frames) + + assert op.get_type_name() == "STFT" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.f32 + assert op.get_output_shape(0) == [4, 13, 6, 2] + + transpose_frames = True + op = ov_opset15.stft(data, window, frame_size, frame_step, transpose_frames) + + assert op.get_type_name() == "STFT" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.f32 + assert op.get_output_shape(0) == [4, 6, 13, 2] + + +def test_search_sorted(): + sorted_sequence = ov.parameter([7, 256, 200, 200], name="sorted", dtype=np.float32) + values = ov.parameter([7, 256, 200, 10], name="values", dtype=np.float32) + op = ov_opset15.search_sorted(sorted_sequence=sorted_sequence, values=values, name="default") + assert op.get_type_name() == "SearchSorted" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.i64 + assert op.get_output_shape(0) == [7, 256, 200, 10] + assert op.get_attributes()["right_mode"] is False + assert op.get_friendly_name() == "default" + + op = ov_opset15.search_sorted(sorted_sequence, values, right_mode=True, name="right") + assert op.get_type_name() == "SearchSorted" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.i64 + assert op.get_output_shape(0) == [7, 256, 200, 10] + assert op.get_attributes()["right_mode"] is True + assert op.get_friendly_name() == "right" + + op = ov_opset15.search_sorted(sorted_sequence, values, False, name="left") + assert op.get_type_name() == "SearchSorted" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.i64 + assert op.get_output_shape(0) == [7, 256, 200, 10] + assert op.get_attributes()["right_mode"] is False + assert op.get_friendly_name() == "left" + + def test_parameter_get_attributes(): parameter = ov.parameter([2, 2], dtype=np.float32, name="InputData") parameter_attributes = parameter.get_attributes() diff --git a/src/bindings/python/tests/test_graph/test_custom_op.py b/src/bindings/python/tests/test_graph/test_custom_op.py index 5a2e33c7ee5783..8643844e2c54fd 100644 --- a/src/bindings/python/tests/test_graph/test_custom_op.py +++ b/src/bindings/python/tests/test_graph/test_custom_op.py @@ -12,7 +12,7 @@ from openvino.runtime import DiscreteTypeInfo import openvino.runtime.opset14 as ops -from tests.utils.helpers import create_filename_for_test +from tests.utils.helpers import create_filenames_for_ir class CustomOp(Op): @@ -108,7 +108,7 @@ def visit_attributes(self, visitor): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request @pytest.fixture def prepared_paths(request, tmp_path): - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) yield xml_path, bin_path diff --git a/src/bindings/python/tests/test_graph/test_identity.py b/src/bindings/python/tests/test_graph/test_identity.py new file mode 100644 index 00000000000000..fea1c88858c874 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_identity.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from openvino.runtime.opset15 import parameter +from openvino.runtime.opset16 import identity +from openvino import PartialShape, Type + + +@pytest.mark.parametrize( + ("input_shape", "expected_output_shape"), + [ + ([4, 4], PartialShape([4, 4])), + ([10, 8, 8], PartialShape([10, 8, 8])), + ([-1, -1, -1], PartialShape([-1, -1, -1])), + ([10, -1, -1], PartialShape([10, -1, -1])), + ], +) +@pytest.mark.parametrize("op_name", ["identity", "identityOpset16"]) +def test_inverse_param_inputs(input_shape, expected_output_shape, op_name): + data = parameter(input_shape, dtype=np.float32) + + op = identity(data, name=op_name) + assert op.get_output_size() == 1 + assert op.get_type_name() == "Identity" + assert op.get_friendly_name() == op_name + assert op.get_output_element_type(0) == Type.f32 + assert op.get_output_partial_shape(0) == expected_output_shape diff --git a/src/bindings/python/tests/test_graph/test_manager.py b/src/bindings/python/tests/test_graph/test_manager.py index b5fce8cc09b8d7..ff72ef43158d6e 100644 --- a/src/bindings/python/tests/test_graph/test_manager.py +++ b/src/bindings/python/tests/test_graph/test_manager.py @@ -12,7 +12,7 @@ from openvino.runtime.passes import Manager, Serialize, ConstantFolding, Version from tests.test_graph.util import count_ops_of_type -from tests.utils.helpers import create_filename_for_test, compare_models +from tests.utils.helpers import create_filenames_for_ir, compare_models def create_model(): @@ -51,7 +51,7 @@ def test_constant_folding(): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request @pytest.fixture def prepare_ir_paths(request, tmp_path): - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) yield xml_path, bin_path @@ -138,7 +138,7 @@ def test_serialize_pass_mixed_args_kwargs_v2(prepare_ir_paths): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_serialize_pass_wrong_num_of_args(request, tmp_path): - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) pass_manager = Manager() with pytest.raises(TypeError) as e: diff --git a/src/bindings/python/tests/test_graph/test_ops_fused.py b/src/bindings/python/tests/test_graph/test_ops_fused.py index bdbf4a1a9f1f9c..2bab743bfd7afb 100644 --- a/src/bindings/python/tests/test_graph/test_ops_fused.py +++ b/src/bindings/python/tests/test_graph/test_ops_fused.py @@ -110,17 +110,6 @@ def test_clamp_operator(): assert list(model.get_output_shape(0)) == [2, 2] -def test_squeeze_operator(): - data_shape = [1, 2, 1, 3, 1, 1] - parameter_data = ov.parameter(data_shape, name="Data", dtype=np.float32) - axes = [2, 4] - model = ov.squeeze(parameter_data, axes) - - assert model.get_type_name() == "Squeeze" - assert model.get_output_size() == 1 - assert list(model.get_output_shape(0)) == [1, 2, 3, 1] - - def test_squared_difference_operator(): x1_shape = [1, 2, 3, 4] x2_shape = [2, 3, 4] diff --git a/src/bindings/python/tests/test_graph/test_squeeze.py b/src/bindings/python/tests/test_graph/test_squeeze.py new file mode 100644 index 00000000000000..869d84a0414841 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_squeeze.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import openvino.runtime.opset1 as ov_opset1 +import openvino.runtime.opset15 as ov_opset15 +import numpy as np +import pytest + + +def test_squeeze_v1_operator(): + data_shape = [1, 2, 1, 3, 1, 1] + parameter_data = ov_opset1.parameter(data_shape, name="Data", dtype=np.float32) + axes = [2, 4] + model = ov_opset1.squeeze(parameter_data, axes) + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [1, 2, 3, 1] + + +@pytest.mark.parametrize(("input_shape", "axes", "allow_axis_skip", "expected_shape"), [ + ((1, 2, 1, 3, 1, 1), [1, 2, 4], True, [1, 2, 3, 1]), + ((1, 2, 1, 3, 1, 1), [1, 2, 4], False, [1, 2, 3, 1]), + ((2, -1, 3), [1], False, [2, 3]) +]) +def test_squeeze_v15_operator(input_shape, axes, allow_axis_skip, expected_shape): + parameter_data = ov_opset15.parameter(input_shape, name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, axes, allow_axis_skip, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == expected_shape + + +def test_squeeze_v15_dynamic_rank_output(): + parameter_data = ov_opset15.parameter((2, -1, 3), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, [1], True, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert model.get_output_partial_shape(0).to_string() == "[...]" + + +def test_squeeze_v15_axes_not_given(): + parameter_data = ov_opset15.parameter((1, 3, 1, 1, 3, 5), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(data=parameter_data, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [3, 3, 5] diff --git a/src/bindings/python/tests/test_runtime/test_compiled_model.py b/src/bindings/python/tests/test_runtime/test_compiled_model.py index d6909fa94711d3..9e17497fc22081 100644 --- a/src/bindings/python/tests/test_runtime/test_compiled_model.py +++ b/src/bindings/python/tests/test_runtime/test_compiled_model.py @@ -2,6 +2,7 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os import pytest import numpy as np @@ -14,6 +15,7 @@ generate_relu_compiled_model_with_config, encrypt_base64, decrypt_base64, + create_filenames_for_ir, create_filename_for_test) from openvino import Model, Shape, Core, Tensor, serialize from openvino.runtime import ConstOutput @@ -97,6 +99,43 @@ def test_export_import_advanced(device): assert np.argmax(res[new_compiled.outputs[0]]) == 531 +# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request +@pytest.fixture +def prepare_blob_path(request, tmp_path): + filename = create_filename_for_test(request.node.name) + path_to_blob = tmp_path / str(filename + ".blob") + yield path_to_blob + + os.remove(path_to_blob) + + +def test_export_import_via_file(prepare_blob_path, device): + import io + + core = Core() + + if props.device.Capability.EXPORT_IMPORT not in core.get_property(device, props.device.capabilities): + pytest.skip(f"{core.get_property(device, props.device.full_name)} plugin due-to export, import model API isn't implemented.") + + compiled_model = generate_relu_compiled_model(device) + + user_stream = io.BytesIO() + + compiled_model.export_model(user_stream) + path_to_blob = prepare_blob_path + + with open(path_to_blob, "wb") as f_w: + f_w.write(user_stream.getbuffer()) + + with open(path_to_blob, "rb") as f_r: + new_compiled = core.import_model(f_r.read(), device) + + img = generate_image() + res = new_compiled.infer_new_request({"data": img}) + + assert np.argmax(res[new_compiled.outputs[0]]) == 531 + + @pytest.mark.parametrize("input_arguments", [[0], ["data"], []]) def test_get_input(device, input_arguments): compiled_model = generate_relu_compiled_model(device) @@ -250,7 +289,7 @@ def test_direct_infer(device, shared_flag): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_compiled_model_after_core_destroyed(request, tmp_path, device): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) model = get_relu_model() serialize(model, xml_path, bin_path) with open(bin_path, "rb") as f: @@ -267,7 +306,7 @@ def test_compiled_model_after_core_destroyed(request, tmp_path, device): def test_compiled_model_from_buffer_in_memory(request, tmp_path, device): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) model = get_relu_model() serialize(model, xml_path, bin_path) with open(bin_path, "rb") as f: diff --git a/src/bindings/python/tests/test_runtime/test_core.py b/src/bindings/python/tests/test_runtime/test_core.py index 8c15c2a32a05c5..d147ce2d6bcab2 100644 --- a/src/bindings/python/tests/test_runtime/test_core.py +++ b/src/bindings/python/tests/test_runtime/test_core.py @@ -28,7 +28,7 @@ get_relu_model, plugins_path, compare_models, - create_filename_for_test, + create_filenames_for_ir, get_model_with_template_extension, ) @@ -77,7 +77,7 @@ def test_core_class(device): ]) def test_compile_model(request, tmp_path, device_name): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) relu_model = get_relu_model() serialize(relu_model, xml_path, bin_path) model = core.read_model(model=xml_path, weights=bin_path) @@ -97,7 +97,7 @@ def get_model(): @pytest.fixture def get_model_path(request, tmp_path): - xml_path, _ = create_filename_for_test(request.node.name, tmp_path, True) + xml_path, _ = create_filenames_for_ir(request.node.name, tmp_path, True) serialize(get_relu_model(), xml_path) return Path(xml_path) @@ -130,7 +130,7 @@ def test_compact_api(model_type, device_name, config, request): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_read_model_from_ir(request, tmp_path): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) relu_model = get_relu_model() serialize(relu_model, xml_path, bin_path) model = core.read_model(model=xml_path, weights=bin_path) @@ -143,7 +143,7 @@ def test_read_model_from_ir(request, tmp_path): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_read_model_from_tensor(request, tmp_path): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path, is_xml_path=True, is_bin_path=True) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path, is_xml_path=True, is_bin_path=True) relu_model = get_relu_model() serialize(relu_model, xml_path, bin_path) arr = np.ones(shape=(10), dtype=np.int8) @@ -164,7 +164,7 @@ def test_read_model_with_wrong_input(): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_read_model_as_path(request, tmp_path): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path, True, True) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path, True, True) relu_model = get_relu_model() serialize(relu_model, xml_path, bin_path) @@ -181,7 +181,7 @@ def test_read_model_as_path(request, tmp_path): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_read_model_from_buffer(request, tmp_path): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) relu_model = get_relu_model() serialize(relu_model, xml_path, bin_path) with open(bin_path, "rb") as f: @@ -195,7 +195,7 @@ def test_read_model_from_buffer(request, tmp_path): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_model_from_buffer_valid(request, tmp_path): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) relu_model = get_relu_model() serialize(relu_model, xml_path, bin_path) with open(bin_path, "rb") as f: diff --git a/src/bindings/python/tests/test_runtime/test_model.py b/src/bindings/python/tests/test_runtime/test_model.py index 62b1eac9da3865..0ae592b2d1dff5 100644 --- a/src/bindings/python/tests/test_runtime/test_model.py +++ b/src/bindings/python/tests/test_runtime/test_model.py @@ -30,7 +30,7 @@ from tests.utils.helpers import ( generate_add_model, generate_model_with_memory, - create_filename_for_test, + create_filenames_for_ir, ) @@ -618,7 +618,7 @@ def check_rt_info(model): assert model.get_rt_info(["optimization", "test"]) core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) input_shape = PartialShape([1]) param = ops.parameter(input_shape, dtype=np.float32, name="data") relu1 = ops.relu(param, name="relu1") @@ -701,7 +701,7 @@ def check_rt_info(model): assert rt_info_val in ["float_empty", "nodes", "type", "directed"] core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) input_shape = PartialShape([1]) param = ops.parameter(input_shape, dtype=np.float32, name="data") relu1 = ops.relu(param, name="relu1") diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index e8d3162c362f4f..6065d72196b44b 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -266,6 +266,11 @@ def test_properties_ro(ov_property_ro, expected_value): ), (props.force_tbb_terminate, "FORCE_TBB_TERMINATE", ((True, True), (False, False))), (props.enable_mmap, "ENABLE_MMAP", ((True, True), (False, False))), + ( + props.weights_path, + "WEIGHTS_PATH", + (("./model.bin", "./model.bin"),), + ), (hints.inference_precision, "INFERENCE_PRECISION_HINT", ((Type.f32, Type.f32),)), ( hints.model_priority, @@ -330,6 +335,11 @@ def test_properties_ro(ov_property_ro, expected_value): ((64, 64),), ), (hints.kv_cache_precision, "KV_CACHE_PRECISION", ((Type.f32, Type.f32),)), + ( + hints.activations_scale_factor, + "ACTIVATIONS_SCALE_FACTOR", + ((0.0, 0.0),), + ), ( intel_cpu.denormals_optimization, "CPU_DENORMALS_OPTIMIZATION", diff --git a/src/bindings/python/tests/test_transformations/test_compression.py b/src/bindings/python/tests/test_transformations/test_compression.py index fa46b6d227f1e3..d2754fd29d9c70 100644 --- a/src/bindings/python/tests/test_transformations/test_compression.py +++ b/src/bindings/python/tests/test_transformations/test_compression.py @@ -9,7 +9,7 @@ from openvino.runtime.opset13 import add, multiply import openvino as ov -from tests.utils.helpers import create_filename_for_test +from tests.utils.helpers import create_filenames_for_ir def make_constant(values, transposed): @@ -38,7 +38,7 @@ def make_model(add_consts, mul_consts): def get_constants(model, request, tmp_path) -> List[Constant]: - model_fname, _ = create_filename_for_test(request.node.name, tmp_path) + model_fname, _ = create_filenames_for_ir(request.node.name, tmp_path) ov.save_model(model, model_fname) core = ov.Core() restored_model = core.read_model(model_fname) diff --git a/src/bindings/python/tests/test_transformations/test_offline_api.py b/src/bindings/python/tests/test_transformations/test_offline_api.py index cd336493b58246..e265cef4635988 100644 --- a/src/bindings/python/tests/test_transformations/test_offline_api.py +++ b/src/bindings/python/tests/test_transformations/test_offline_api.py @@ -18,7 +18,7 @@ from openvino import Model, PartialShape, Core, serialize, save_model import openvino.runtime as ov -from tests.utils.helpers import create_filename_for_test, compare_models, _compare_models +from tests.utils.helpers import create_filenames_for_ir, compare_models, _compare_models def get_relu_model(): @@ -171,10 +171,10 @@ def test_fused_names_cleanup(): def prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin): - xml_path, bin_path = create_filename_for_test(request.node.name, - tmp_path, - is_path_xml, - is_path_bin) + xml_path, bin_path = create_filenames_for_ir(request.node.name, + tmp_path, + is_path_xml, + is_path_bin) shape = [100, 100, 2] parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A") parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B") @@ -267,10 +267,10 @@ def test_compress_model_transformation(): ) def test_version_default(request, tmp_path, is_path_xml, is_path_bin): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, - tmp_path, - is_path_xml, - is_path_bin) + xml_path, bin_path = create_filenames_for_ir(request.node.name, + tmp_path, + is_path_xml, + is_path_bin) shape = [100, 100, 2] parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A") parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B") @@ -297,10 +297,10 @@ def test_version_default(request, tmp_path, is_path_xml, is_path_bin): ], ) def test_serialize_default_bin(request, tmp_path, is_path_xml, is_path_bin): - xml_path, bin_path = create_filename_for_test(request.node.name, - tmp_path, - is_path_xml, - is_path_bin) + xml_path, bin_path = create_filenames_for_ir(request.node.name, + tmp_path, + is_path_xml, + is_path_bin) model = get_relu_model() serialize(model, xml_path) assert os.path.exists(bin_path) @@ -311,7 +311,7 @@ def test_serialize_default_bin(request, tmp_path, is_path_xml, is_path_bin): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_version_ir_v10(request, tmp_path): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) shape = [100, 100, 2] parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A") parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B") @@ -332,7 +332,7 @@ def test_version_ir_v10(request, tmp_path): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_version_ir_v11(request, tmp_path): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) + xml_path, bin_path = create_filenames_for_ir(request.node.name, tmp_path) shape = [100, 100, 2] parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A") parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B") diff --git a/src/bindings/python/tests/test_transformations/test_pattern_ops.py b/src/bindings/python/tests/test_transformations/test_pattern_ops.py index 041bf83764e265..24b28061582c68 100644 --- a/src/bindings/python/tests/test_transformations/test_pattern_ops.py +++ b/src/bindings/python/tests/test_transformations/test_pattern_ops.py @@ -189,7 +189,7 @@ def test_pattern_optional_root(): def test_wrap_type_pattern_type(): - last_opset_number = 15 + last_opset_number = 16 for i in range(1, last_opset_number + 1): WrapType(f"opset{i}.Parameter") WrapType(f"opset{i}::Parameter") diff --git a/src/bindings/python/tests/test_transformations/test_public_transformations.py b/src/bindings/python/tests/test_transformations/test_public_transformations.py index 429bc6c192acc4..a10fea786b9770 100644 --- a/src/bindings/python/tests/test_transformations/test_public_transformations.py +++ b/src/bindings/python/tests/test_transformations/test_public_transformations.py @@ -17,7 +17,7 @@ ) from tests.test_transformations.utils.utils import count_ops, get_relu_model -from tests.utils.helpers import create_filename_for_test, compare_models +from tests.utils.helpers import create_filenames_for_ir, compare_models def get_model(): @@ -132,10 +132,10 @@ def test_low_latency2(): ) def test_serialize_pass(request, tmp_path, is_path_xml, is_path_bin): core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, - tmp_path, - is_path_xml, - is_path_bin) + xml_path, bin_path = create_filenames_for_ir(request.node.name, + tmp_path, + is_path_xml, + is_path_bin) model = get_relu_model() diff --git a/src/bindings/python/tests/utils/helpers.py b/src/bindings/python/tests/utils/helpers.py index 2ea00484e9840c..c14bd6e5e779da 100644 --- a/src/bindings/python/tests/utils/helpers.py +++ b/src/bindings/python/tests/utils/helpers.py @@ -303,7 +303,14 @@ def generate_abs_compiled_model_with_data(device, ov_type, numpy_dtype): return compiled_model, request, tensor1, array1 -def create_filename_for_test(test_name, tmp_path, is_xml_path=False, is_bin_path=False): +def create_filename_for_test(test_name): + python_version = str(sys.version_info.major) + "_" + str(sys.version_info.minor) + filename = test_name.replace("test_", "").replace("[", "_").replace("]", "_") + filename = filename + "_" + python_version + return filename + + +def create_filenames_for_ir(test_name, tmp_path, is_xml_path=False, is_bin_path=False): """Return a tuple with automatically generated paths for xml and bin files. :param test_name: Name used in generating. @@ -311,9 +318,7 @@ def create_filename_for_test(test_name, tmp_path, is_xml_path=False, is_bin_path :param is_bin_path: True if bin file should be pathlib.Path object, otherwise return string. :return: Tuple with two objects representing xml and bin files. """ - python_version = str(sys.version_info.major) + "_" + str(sys.version_info.minor) - filename = test_name.replace("test_", "").replace("[", "_").replace("]", "_") - filename = filename + "_" + python_version + filename = create_filename_for_test(test_name) path_to_xml = tmp_path / Path(filename + ".xml") path_to_bin = tmp_path / Path(filename + ".bin") _xml = path_to_xml if is_xml_path else str(path_to_xml) diff --git a/src/bindings/python/wheel/CMakeLists.txt b/src/bindings/python/wheel/CMakeLists.txt index 7d8ebd5256a968..d943c3caa21e52 100644 --- a/src/bindings/python/wheel/CMakeLists.txt +++ b/src/bindings/python/wheel/CMakeLists.txt @@ -116,14 +116,14 @@ if(pip_version VERSION_GREATER_EQUAL 22.0) --build-option --plat-name=${PLATFORM_TAG} "${CMAKE_CURRENT_SOURCE_DIR}") else() + # for --config-setting explanation see https://github.com/pypa/setuptools/issues/2491 set(wheel_build_command - ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" - --quiet - --no-user-cfg - bdist_wheel - --dist-dir ${openvino_wheels_output_dir} - --build-number=${WHEEL_BUILD} - --plat-name=${PLATFORM_TAG}) + ${Python3_EXECUTABLE} -m build "${CMAKE_CURRENT_SOURCE_DIR}" + --outdir ${openvino_wheels_output_dir} + --config-setting=--build-option=--build-number=${WHEEL_BUILD} + --config-setting=--build-option=--plat-name=${PLATFORM_TAG} + --config-setting=--quiet + --wheel) endif() add_custom_command(OUTPUT ${openvino_wheel_path} diff --git a/src/bindings/python/wheel/requirements-dev.txt b/src/bindings/python/wheel/requirements-dev.txt index c88d91751ebbc9..a6c2dcbdf8fee1 100644 --- a/src/bindings/python/wheel/requirements-dev.txt +++ b/src/bindings/python/wheel/requirements-dev.txt @@ -1,4 +1,5 @@ -c ../constraints.txt setuptools wheel +build patchelf; sys_platform == 'linux' and platform_machine == 'x86_64' diff --git a/src/cmake/ov_parallel.cmake b/src/cmake/ov_parallel.cmake index 1c10f1c121d8bc..110e7fe185f63f 100644 --- a/src/cmake/ov_parallel.cmake +++ b/src/cmake/ov_parallel.cmake @@ -23,7 +23,7 @@ function(_ov_get_tbb_location tbb_target _tbb_lib_location_var) get_target_property(_imported_configs ${target} IMPORTED_CONFIGURATIONS) if(NOT _imported_configs) # if IMPORTED_CONFIGURATIONS property is not set, then set a common list - set(_imported_configs RELEASE NONE) + set(_imported_configs RELEASE DEBUG NONE) if(NOT OV_GENERATOR_MULTI_CONFIG) string(TOUPPER ${CMAKE_BUILD_TYPE} _build_type) list(APPEND _imported_configs ${_build_type}) diff --git a/src/common/low_precision_transformations/src/mat_mul.cpp b/src/common/low_precision_transformations/src/mat_mul.cpp index 15afe2408cc459..705f3d400a098c 100644 --- a/src/common/low_precision_transformations/src/mat_mul.cpp +++ b/src/common/low_precision_transformations/src/mat_mul.cpp @@ -160,7 +160,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ov::pass::p } const auto newMulConst = NetworkHelper::toScalarIfPossible(fold( - mulConst1, + foldConvert(mulConst1, element::f32), foldConvert(mulConst2, element::f32))); const auto newMultiply = std::make_shared>( diff --git a/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp b/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp index 454802c965f945..8425db398085ae 100644 --- a/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp @@ -157,6 +157,22 @@ std::vector testValues = { {}, {}}}, + // test: multiply with f16 constant + {LayerTransformation::createParamsU8I8(), + {ov::element::u8, + {ov::element::f32, {}, ov::builder::subgraph::DequantizationOperations::Multiply{0.02f}.setConstantPrecision(ov::element::f16)}, + {std::vector(1024 * 1024, 1.f), ov::element::i8, ov::Shape{1024, 1024}}, + {}, + {ov::element::f32, {}, {0.1f}}, + }, + {ov::element::u8, + {}, + {std::vector(1024 * 1024, 1.f), ov::element::i8, ov::Shape{1024, 1024}}, + ov::element::u8, + {{}, {}, {0.02f * 0.1f}}, + {}, + {}}}, + // supported 3D: U8 & I8 with Dq on weights {LayerTransformation::createParamsU8I8(), { diff --git a/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp b/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp index 3dcd98ef0a95fd..9174d9866db503 100644 --- a/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expressions/buffer_expression.hpp @@ -38,6 +38,7 @@ class BufferExpression : public Expression { size_t get_offset() const { return m_offset; } size_t get_allocation_size() const { return m_allocation_size; } size_t get_byte_size() const; + ov::element::Type get_data_type() const; void set_reg_group(size_t reg_group) { m_reg_group = reg_group; } void set_cluster_id(size_t cluster) { m_cluster_id = cluster; } diff --git a/src/common/snippets/include/snippets/lowered/loop_info.hpp b/src/common/snippets/include/snippets/lowered/loop_info.hpp index cc66f5f6ffcc95..23e1f14a8b7f5e 100644 --- a/src/common/snippets/include/snippets/lowered/loop_info.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_info.hpp @@ -211,13 +211,20 @@ class UnifiedLoopInfo : public LoopInfo { int64_t data_size = 0; bool is_dynamic() const; + bool is_static() const; + + friend bool operator==(const LoopPortDesc& lhs, const LoopPortDesc& rhs); + friend bool operator!=(const LoopPortDesc& lhs, const LoopPortDesc& rhs); }; // The structure describes full information about port // - TODO [140365] : UnifiedLoopInfo should have the map of LoopPorts and LoopDesc as class field // instead of the separate vectors with descriptors. struct LoopPortInfo { - LoopPort port; - LoopPortDesc desc; + LoopPortInfo() = default; + LoopPortInfo(LoopPort port_, LoopPortDesc desc_) : port(std::move(port_)), desc(std::move(desc_)) {} + + LoopPort port = {}; + LoopPortDesc desc = {}; }; UnifiedLoopInfo() = default; @@ -367,6 +374,12 @@ class UnifiedLoopInfo : public LoopInfo { caller(m_output_ports[i], m_output_port_descs[i]); } + /** + * @brief Return loop port info of an expression port + * @param expr_port - expression port. + */ + LoopPortInfo get_loop_port_info(const ExpressionPort& expr_port); + protected: /** * @brief Clone LoopPortDesc[actual_port_idx] `new_count` times and insert on the place of current desc diff --git a/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp b/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp index 1597eaa2377a50..312abb02abf7b5 100644 --- a/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/define_buffer_clusters.hpp @@ -6,6 +6,8 @@ #include "pass.hpp" +#include "snippets/lowered/loop_info.hpp" + namespace ov { namespace snippets { namespace lowered { @@ -45,7 +47,7 @@ class DefineBufferClusters : public RangedPass { private: using BufferCluster = std::set; using BufferClusters = std::vector; - using BufferPorts = std::unordered_map>; + using BufferMap = std::unordered_map; /** * @brief Finds Buffer cluster in set of clusters which contains the target expression with Buffer * @param target target expression with Buffer op @@ -58,76 +60,72 @@ class DefineBufferClusters : public RangedPass { * @param target_expr expression with target op - LoopEnd or MemoryAccess op * @return boolean value */ - bool is_direct_buffer(const BufferExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) const; + static bool is_direct_buffer(const BufferExpressionPtr& buffer_expr, const ExpressionPtr& target_expr); /** * @brief Creates new buffer cluster if buffer_exprs is missed in clusters. If buffer_exprs is already in clusters, do nothing * @param buffer_expr expression with Buffer op */ void create_new_cluster(const BufferExpressionPtr& buffer_expr); + /** + * @brief Add buffers to the existing clusters + * @param existing_cluster existing clusters + * @param buffers buffers which will be added to the existing cluster + */ + static void add_buffers_to_cluster(BufferCluster& existing_cluster, const std::set& buffers); /** * @brief Returns common ID of cluster if all buffer inside have the same Buffer ID. Otherwise returns the default value SIZE_MAX * that means that Buffers in cluster have different IDs. * @param cluster set of Buffer expressions - cluster * @return common buffer ID or SIZE_MAX - size value */ - size_t get_cluster_buffer_id(const BufferCluster& cluster) const; + static size_t get_cluster_buffer_id(const BufferCluster& cluster); /** * @brief Analyzes Loop: if Loop has Buffer ops on inputs and outputs, Loop can read and write from/to the same memory. + * @param loop_manager loop manager * @param expr_it iterator of Linear IR which refers to the expression with LoopEnd */ - void parse_loop(const LinearIR::constExprIt& expr_it); + void parse_loop(const LoopManagerPtr& loop_manager, const LinearIR::constExprIt& expr_it); /** * @brief Analyzes full MemoryAccess op: if the op has Buffer ops on I/O, the op can read and write from/to the same memory. * @param expr expression with full MemoryAccess op */ void parse_memory_access_op(const ExpressionPtr& expr); /** - * @brief Gets input outputs buffers of Loop - * @param loop_expr expression with LoopEnd op - * @return unordered map [Expression -> set of input ports] which represents input Buffers of Loop + * @brief Find all direct buffers that are connected to the current Loop + * @param loop_info current unified loop info + * @param loop_expr the target LoopEnd expression + * @return input and output buffer maps */ - BufferPorts get_input_buffers(const ExpressionPtr& loop_expr) const; - /** - * @brief Gets output buffers of Loop - * @param loop_expr expression with LoopEnd op - * @return unordered map [Expression -> set of input ports] which represents output Buffers of Loop - */ - BufferPorts get_output_buffers(const ExpressionPtr& loop_expr) const; + static std::pair get_direct_buffers(const UnifiedLoopInfoPtr& loop_info, const ExpressionPtr& loop_expr); /** * @brief Analyzes nested Loops: unite nested buffer clusters if they can reproduce `window` sliding - * @param input_buffers unordered map [Expression -> set of input ports] which represents input Buffers of Loop - * @param output_buffers unordered map [Expression -> set of output ports (one)] which represents output Buffers of Loop + * @param loop_manager loop manager + * @param input_buffers unordered map [Expression -> LoopPortInfo] which represents input Buffers of Loop + * @param output_buffers unordered map [Expression -> LoopPortInfo] which represents output Buffers of Loop * @param outer_loop_end_expr_it iterator of Linear IR which refers to the expression with outer LoopEnd */ - void parse_nested_loops(const BufferPorts& input_buffers, const BufferPorts& output_buffers, const LinearIR::constExprIt& outer_loop_end_expr_it); + void parse_nested_loops(const LoopManagerPtr& loop_manager, const BufferMap& input_buffers, const BufferMap& output_buffers, + const LinearIR::constExprIt& outer_loop_end_expr_it); /** - * @brief Finds the last connected Loop to the target Buffer and returns the corresponding finalization offset + * @brief Finds the last connected Loop to the target Buffer and init the corresponding loop port info + * @param loop_manager loop manager * @param buffer_expr expression with Buffer op - * @return finalization offset - int64_t value - */ - int64_t get_buffer_finalization_offset(const BufferExpressionPtr& buffer_expr) const; - /** - * @brief Check if two Buffer expressions are connected to the same Loop. Set common LoopEnd as `loop` parameter and - * indexes of Loop ports `up_idx` and `down_idx` if Buffers are really neighbours - * @param up expression with upper Buffer op - * @param down expression with lower Buffer op - * @param loop expression with common LoopEnd op - * @param up_idx the reference to port index of upper Buffer op to the Loop - * @param down_idx the reference to port index of lower Buffer op to the Loop - * @return Return True if the Buffers are connected to the same Loop + * @param port_info target loop port info to be initialized + * @return status - True if loop port has been found. Otherwise, return false - not connected to the Loop. */ - static bool are_buffer_neighbours(const BufferExpressionPtr& up, const BufferExpressionPtr& down, ExpressionPtr& loop, - size_t& up_idx, size_t& down_idx); + static bool init_buffer_last_loop_port_info(const LoopManagerPtr& loop_manager, const BufferExpressionPtr& buffer_expr, + UnifiedLoopInfo::LoopPortInfo& port_info); /** * @brief Unite clusters + * @param loop_manager loop manager * @param inner_cluster_it iterator to inner cluster - buffer cluster is in the loop * @param outer_cluster buffer clusters with buffers outside the Loop * @param outer_buffer target Buffer from outer_cluster * @param is_outer_up true if outer buffer is upper in Linear IR than inner Buffers * @return Return True if clusters have been united */ - bool unite_nested_clusters(const BufferClusters::iterator& inner_cluster_it, BufferCluster& outer_cluster, + bool unite_nested_clusters(const LoopManagerPtr& loop_manager, const BufferClusters::iterator& inner_cluster_it, BufferCluster& outer_cluster, const BufferExpressionPtr& outer_buffer, bool is_outer_up); BufferClusters m_clusters; diff --git a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp index 169dbd30e35cc2..e94e1977974716 100644 --- a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp @@ -25,7 +25,7 @@ class InitLoops : public Pass { bool run(LinearIR& linear_ir) override; private: - static void update_compile_parameters(const UnifiedLoopInfoPtr& loop_info, size_t loop_id); + static void update_compile_parameters(const UnifiedLoopInfoPtr& loop_info); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/lowered/pass/mark_invariant_shape_path.hpp b/src/common/snippets/include/snippets/lowered/pass/mark_invariant_shape_path.hpp new file mode 100644 index 00000000000000..6a31a697baca77 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/mark_invariant_shape_path.hpp @@ -0,0 +1,65 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "pass.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface MarkInvariantShapePath + * @brief The helper pass for BufferAllocation pipeline: + * - Many buffer-relates passes (SetBufferRegGroup, DefineBufferClusters) depend on loop pointer increments. + * The increments are unknown in dynamic case, so these passes can't set reg groups and clusters efficiently. + * The current pass marks expressions port which will have the same shape. The shape and layout means + * the same loop pointer arithmetic in runtime. + * @ingroup snippets + */ +class MarkInvariantShapePath: public RangedPass { +public: + OPENVINO_RTTI("MarkInvariantShapePath", "RangedPass") + MarkInvariantShapePath() = default; + + /** + * @brief Apply the pass to the Linear IR + * @param linear_ir the target Linear IR + * @return status of the pass + */ + bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override; + + /** + * @brief Returns ID (color) of the current Invariant Shape path for the passed port. + * Ports which have the same IDs of the paths - will have the same shapes in runtime. + * Note: if passed port is input port, the method returns value for source of port connector + * for the passed port. Because the shape is created by output ports of expressions. + * @param port target expression port + * @return ID + */ + static size_t getInvariantPortShapePath(const ExpressionPort& port); + +private: + /** + * @brief Sets ID (color) of the current Invariant Shape path for the passed output port. + * Ports which have the same IDs of the paths - will have the same shapes in runtime. + * @param port target expression port + * @param value ID of the path (color) + */ + static void SetInvariantPortShapePath(const ExpressionPort& port, size_t value); + + /** + * @brief Return runtime info for the passed expression port + * @param port target expression port + * @return runtime info map + */ + static ov::RTMap& get_rt_info(const ExpressionPort& port); +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp b/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp index cba3f28856be42..dc22ce4beff1a0 100644 --- a/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/set_buffer_reg_group.hpp @@ -6,6 +6,7 @@ #include "pass.hpp" +#include "snippets/lowered/loop_info.hpp" #include "snippets/utils/utils.hpp" namespace ov { @@ -38,34 +39,19 @@ class SetBufferRegGroup: public RangedPass { * @param linear_ir the target Linear IR * @return status of the pass */ - bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override; - - struct ShiftPtrParams { - ShiftPtrParams() = default; - ShiftPtrParams(int64_t ds, int64_t pi, int64_t fo) : data_size(ds), ptr_increment(pi), finalization_offset(fo) {} - int64_t data_size = 0; - int64_t ptr_increment = 0; - int64_t finalization_offset = 0; - - inline bool is_static() const { - return !utils::is_dynamic_value(ptr_increment) && !utils::is_dynamic_value(finalization_offset); - } - - friend bool operator==(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs); - friend bool operator!=(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs); - }; + bool run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override; /** - * @brief Check if two Buffers can be in one register group by ShiftPtrParams < data_size, ptr_increment, finalization_offset > - * @param lhs Data pointer shift params for first Buffer - * @param rhs Data pointer shift params for second Buffer + * @brief Check if two Buffers can be in one register group by LoopDesc < data_size, ptr_increment, finalization_offset > + * @param lhs LoopPortInfo (Port and Data pointer shift params for first Buffer) + * @param rhs LoopPortInfo (Port and Data pointer shift params for second Buffer) * @return Returns True if params are valid to reuse one register. Otherwise returns False */ - static bool can_be_in_one_group(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs); + static bool can_be_in_one_reg_group(const UnifiedLoopInfo::LoopPortInfo& lhs, const UnifiedLoopInfo::LoopPortInfo& rhs); private: using BufferPool = std::vector; - using BufferMap = std::map; + using BufferMap = std::map; /** * @brief Get Buffer Index in Buffer set @@ -76,11 +62,14 @@ class SetBufferRegGroup: public RangedPass { static size_t get_buffer_idx(const BufferExpressionPtr& target, const BufferPool& pool); /** * @brief Create adjacency matrix for Buffer system. See comment in the method for more details. - * @param linear_ir the target Linear IR + * @param loop_manager the loop manager + * @param begin begin iterator + * @param end end iterator * @param pool set of Buffers from the Linear IR * @return adjacency matrix where True value means that Buffers are adjacent and cannot have the same ID */ - static std::vector create_adjacency_matrix(lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end, const BufferPool& pool); + static std::vector create_adjacency_matrix(const LoopManagerPtr& loop_manager, LinearIR::constExprIt begin, LinearIR::constExprIt end, + const BufferPool& pool); /** * @brief Algorithm of Graph coloring where vertices are Buffers * @param buffers set of Buffers from the Linear IR @@ -99,25 +88,23 @@ class SetBufferRegGroup: public RangedPass { * @param buffers set of Buffers from the Linear IR * @param adj Target adjacency matrix */ - static void update_adj_matrix(const std::pair& lhs, - const std::pair& rhs, - const BufferPool& buffers, + static void update_adj_matrix(const BufferMap::value_type& lhs, const BufferMap::value_type& rhs, const BufferPool& buffers, std::vector& adj); + /** * @brief Check if two Buffers are adjacent and cannot have the same ID - * @param lhs Pair where first value is Expression with first Buffer and second value is data pointer shift params for it - * @param rhs Pair where first value is Expression with second Buffer and second value is data pointer shift params for it + * @param lhs LoopPortInfo (Port and Data pointer shift params for first Buffer) + * @param rhs LoopPortInfo (Port and Data pointer shift params for second Buffer) * @return Returns True if they are adjacent, otherwise returns False */ - static bool are_adjacent(const std::pair& lhs, - const std::pair& rhs); + static bool are_adjacent(const BufferMap::value_type& lhs, const BufferMap::value_type& rhs); /** - * @brief Find all buffers that are connected to the current LoopEnd - * @param loop_end_expr expression of the target LoopEnd - * @return buffer map [buffer expr -> ShiftDataPtrs] + * @brief Find all buffers that are connected to the current Loop + * @param loop_info current unified loop info + * @return buffer map */ - static BufferMap get_buffer_loop_neighbours(const ExpressionPtr& loop_end_expr); + static BufferMap get_buffer_loop_neighbours(const UnifiedLoopInfoPtr& loop_info); /** * @brief Find all buffers that are inside the current Loop. * @param loop_end_it expression iterator in LinearIR of the target LoopEnd diff --git a/src/common/snippets/include/snippets/lowered/pass/validate_unified_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/validate_unified_loops.hpp index 80c1b7be19d1f0..d78aaaa668363e 100644 --- a/src/common/snippets/include/snippets/lowered/pass/validate_unified_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/validate_unified_loops.hpp @@ -6,6 +6,9 @@ #include "pass.hpp" +#include "snippets/lowered/loop_manager.hpp" + + namespace ov { namespace snippets { namespace lowered { @@ -27,6 +30,10 @@ class ValidateUnifiedLoops : public Pass { OPENVINO_RTTI("ValidateUnifiedLoops", "Pass") ValidateUnifiedLoops() = default; bool run(LinearIR& linear_ir) override; + +private: + static void validate_loop_infos(const LoopManagerPtr& loop_manager); + static void validate_loop_port_presence(const LinearIR& linear_ir); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/utils/loop_utils.hpp b/src/common/snippets/include/snippets/utils/loop_utils.hpp index b0597008adc1c4..c9f9412ff5c574 100644 --- a/src/common/snippets/include/snippets/utils/loop_utils.hpp +++ b/src/common/snippets/include/snippets/utils/loop_utils.hpp @@ -21,6 +21,12 @@ void update_data_pointer_shifts(const ov::snippets::lowered::UnifiedLoopInfoPtr& * @brief Updates work amount and updates data pointer shifts of the provided "loop_info" */ void update_runtime_parameters(const ov::snippets::lowered::UnifiedLoopInfoPtr& loop_info); +/** + * @brief Check if the passed expression port should be port of the Loop with ID `loop_id`: + * the target expression port should be connected to an expression from another Loop (missed in the loop with ID `loop_id`), + */ +bool should_be_loop_port(const ov::snippets::lowered::ExpressionPort& port, size_t loop_id); + } // namespace utils } // namespace snippets } // namespace ov \ No newline at end of file diff --git a/src/common/snippets/include/snippets/utils/utils.hpp b/src/common/snippets/include/snippets/utils/utils.hpp index f7e584d48a905c..ff4646f24d03b7 100644 --- a/src/common/snippets/include/snippets/utils/utils.hpp +++ b/src/common/snippets/include/snippets/utils/utils.hpp @@ -91,6 +91,12 @@ static inline auto rnd_up(const T lhs, const U rhs) -> decltype(div_up(lhs, rhs) return div_up_res * rhs; } +static inline bool is_planar_layout(const std::vector& order) { + for (size_t i = 0; i < order.size(); ++i) + if (order[i] != i) return false; + return true; +} + inline bool is_dynamic_vdims(const VectorDims& shape) { return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return is_dynamic_value(v); }); } diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index aaa71612cef706..1952b93017aab5 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -170,11 +170,6 @@ ExpressionPtr Expression::clone() const { } bool Expression::visit_attributes(AttributeVisitor &visitor) { - auto is_planar_layout = [](const std::vector& layout) { - for (size_t i = 0; i < layout.size(); ++i) - if (layout[i] != i) return false; - return true; - }; auto subtensor2str = [](const VectorDims& subtensor) { std::stringstream ss; for (size_t i = 0; i < subtensor.size(); ++i) { @@ -203,7 +198,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) { subtensors.emplace_back("in_subtensor_" + std::to_string(i), subtensor2str(subtensor)); const auto& layout = desc->get_layout(); - if (!layout.empty() && !is_planar_layout(layout)) + if (!layout.empty() && !utils::is_planar_layout(layout)) layouts.emplace_back("in_layout_" + std::to_string(i), layout); in_reg_types.emplace_back(regTypeToStr(desc->get_reg().type)); @@ -220,7 +215,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) { subtensors.emplace_back("out_subtensor_" + std::to_string(i), subtensor2str(subtensor)); const auto& layout = desc->get_layout(); - if (!layout.empty() && !is_planar_layout(layout)) + if (!layout.empty() && !utils::is_planar_layout(layout)) layouts.emplace_back("out_layout_" + std::to_string(i), layout); out_reg_types.emplace_back(regTypeToStr(desc->get_reg().type)); diff --git a/src/common/snippets/src/lowered/expressions/buffer_expression.cpp b/src/common/snippets/src/lowered/expressions/buffer_expression.cpp index acc742ff196407..a8b3bb2034b105 100644 --- a/src/common/snippets/src/lowered/expressions/buffer_expression.cpp +++ b/src/common/snippets/src/lowered/expressions/buffer_expression.cpp @@ -25,12 +25,15 @@ ExpressionPtr BufferExpression::clone() const { } bool BufferExpression::visit_attributes(AttributeVisitor &visitor) { + Expression::visit_attributes(visitor); auto allocation_size = utils::value2str(m_allocation_size); auto offset = utils::value2str(m_offset); + auto prc = get_data_type(); visitor.on_attribute("allocation_size", allocation_size); visitor.on_attribute("offset", offset); visitor.on_attribute("reg_group", m_reg_group); visitor.on_attribute("cluster_id", m_cluster_id); + visitor.on_attribute("data_type", prc); return true; } @@ -38,9 +41,13 @@ bool BufferExpression::is_defined() const { return !utils::is_dynamic_value(m_allocation_size); } +ov::element::Type BufferExpression::get_data_type() const { + return get_node()->get_output_element_type(0); +} + size_t BufferExpression::get_byte_size() const { if (is_defined()) - return m_allocation_size * get_node()->get_output_element_type(0).size(); + return m_allocation_size * get_data_type().size(); return utils::get_dynamic_value(); } diff --git a/src/common/snippets/src/lowered/loop_info.cpp b/src/common/snippets/src/lowered/loop_info.cpp index 954cc180a1527b..1c856869878b80 100644 --- a/src/common/snippets/src/lowered/loop_info.cpp +++ b/src/common/snippets/src/lowered/loop_info.cpp @@ -99,7 +99,7 @@ template<> std::vector::iterator LoopInfo::find_loop_port(const ExpressionPort& expr_port) { auto& ports = expr_port.get_type() == ExpressionPort::Input ? m_input_ports : m_output_ports; const auto it = std::find_if(ports.begin(), ports.end(), - [&expr_port](const LoopPort& port) { return *port.expr_port.get() == expr_port; }); + [&expr_port](const LoopPort& port) { return *port.expr_port == expr_port; }); return it; } @@ -176,6 +176,19 @@ bool UnifiedLoopInfo::LoopPortDesc::is_dynamic() const { return utils::is_dynamic_value(ptr_increment) || utils::is_dynamic_value(finalization_offset); } +bool UnifiedLoopInfo::LoopPortDesc::is_static() const { + return !is_dynamic(); +} + +bool operator==(const UnifiedLoopInfo::LoopPortDesc& lhs, const UnifiedLoopInfo::LoopPortDesc& rhs) { + if (&lhs == &rhs) + return true; + return lhs.ptr_increment == rhs.ptr_increment && lhs.finalization_offset == rhs.finalization_offset && lhs.data_size == rhs.data_size; +} +bool operator!=(const UnifiedLoopInfo::LoopPortDesc& lhs, const UnifiedLoopInfo::LoopPortDesc& rhs) { + return !(rhs == lhs); +} + UnifiedLoopInfo::UnifiedLoopInfo(size_t work_amount, size_t increment, const std::vector& entries, const std::vector& exits, const SpecificIterationHandlers& handlers) @@ -321,6 +334,18 @@ void UnifiedLoopInfo::sort_ports() { reorder(m_output_ports, m_output_port_descs); } +UnifiedLoopInfo::LoopPortInfo UnifiedLoopInfo::get_loop_port_info(const ExpressionPort& expr_port) { + OPENVINO_ASSERT(is_loop_port(expr_port), "Failed get_loop_port: expr_port is not a loop port"); + const auto is_input = expr_port.get_type() == ExpressionPort::Input; + const auto& ports = is_input ? m_input_ports : m_output_ports; + const auto& descs = is_input ? m_input_port_descs : m_output_port_descs; + const auto it = std::find_if(ports.begin(), ports.end(), + [&expr_port](const LoopPort& port) { return *port.expr_port == expr_port; }); + const auto index = static_cast(std::distance(ports.cbegin(), it)); + OPENVINO_ASSERT(index < ports.size() && index < descs.size(), "LoopPortInfo has not been found!"); + return {ports[index], descs[index]}; +} + void UnifiedLoopInfo::replace_with_cloned_descs(size_t actual_port_idx, size_t new_count, bool is_input) { auto& descs = is_input ? m_input_port_descs : m_output_port_descs; std::vector target_shifts(new_count, descs[actual_port_idx]); diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 21f4ecc83c57b0..f0e5306c5878bc 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -11,6 +11,7 @@ #include "openvino/core/graph_util.hpp" #include "openvino/core/type.hpp" +#include "snippets/utils/loop_utils.hpp" #include "snippets/itt.hpp" @@ -349,30 +350,45 @@ void LoopManager::fuse_loop_ports(std::vector& output_ports, } void LoopManager::update_loop_ports(const ExpressionPtr& expr) { - auto output_ports = expr->get_output_ports(); - for (size_t i = 0; i < expr->get_input_count(); ++i) { - const auto& source = expr->get_input_port_connector(i)->get_source(); - const auto common_outer_loop_ids = get_common_outer_loops(expr, source.get_expr()); - // The source output port can have several consumers (including the current expr) that can be potential output ports - // So we should verify on the possible future output ports - size_t count_of_common_outer_loops = common_outer_loop_ids.size(); - for (const auto& source_consumer : source.get_connected_ports()) { - if (source_consumer.get_expr() == expr) + auto update_ports = [&](const ov::snippets::lowered::ExpressionPort& connected_port) { + const auto is_output = connected_port.get_type() == ExpressionPort::Output; + // Iterate through all Loops of the connected expression + for (const auto& loop_id : connected_port.get_expr()->get_loop_ids()) { + const auto& loop_info = get_loop_info(loop_id); + // If the connected expression port is not Loop port - nothing to update + // If the target expression is not from the same Loop - nothing to update + if (!loop_info->is_loop_port(connected_port) || !is_loop_id_found(expr, loop_id)) continue; - count_of_common_outer_loops = std::min(count_of_common_outer_loops, get_common_outer_loops(source.get_expr(), source_consumer.get_expr()).size()); - } - replace_loop_ports({common_outer_loop_ids.cbegin(), common_outer_loop_ids.cbegin() + count_of_common_outer_loops}, source, output_ports); - // Save previous port - if (count_of_common_outer_loops != common_outer_loop_ids.size()) { - output_ports.insert(output_ports.begin(), source); - replace_loop_ports({common_outer_loop_ids.cbegin() + count_of_common_outer_loops, common_outer_loop_ids.cend()}, source, output_ports); + + std::vector new_ports; + // Check if some ports of target expression must be Loop port + const auto target_expr_ports = is_output ? expr->get_output_ports() : expr->get_input_ports(); + for (const auto& port : target_expr_ports) { + if (utils::should_be_loop_port(port, loop_id)) + new_ports.push_back(port); + } + // Leave the connected expression port as Loop port if needed + if (utils::should_be_loop_port(connected_port, loop_id)) + new_ports.push_back(connected_port); + + // Nothing should be updated + if (new_ports.size() == 1 && new_ports.front() == connected_port) + continue; + + loop_info->replace_with_new_ports(connected_port, new_ports); } + }; + + // The case with parent loops: source -> target expr + for (size_t i = 0; i < expr->get_input_count(); ++i) { + update_ports(expr->get_input_port_connector(i)->get_source()); } - const auto input_ports = expr->get_input_ports(); + + // The case with child loops: target expr -> consumers for (size_t i = 0; i < expr->get_output_count(); ++i) { const auto& consumers = expr->get_output_port_connector(i)->get_consumers(); for (const auto& consumer : consumers) { - replace_loop_ports(get_common_outer_loops(expr, consumer.get_expr()), consumer, input_ports); + update_ports(consumer); } } } diff --git a/src/common/snippets/src/lowered/loop_port.cpp b/src/common/snippets/src/lowered/loop_port.cpp index 990b2801beccb8..52f59bb1fa4d35 100644 --- a/src/common/snippets/src/lowered/loop_port.cpp +++ b/src/common/snippets/src/lowered/loop_port.cpp @@ -30,7 +30,7 @@ std::shared_ptr LoopPort::clone_with_new_expr(const ExpressionPtr& new bool operator==(const LoopPort& lhs, const LoopPort& rhs) { if (&lhs == &rhs) return true; - return lhs.expr_port == rhs.expr_port && lhs.is_incremented == rhs.is_incremented && lhs.dim_idx == rhs.dim_idx; + return *lhs.expr_port == *rhs.expr_port && lhs.is_incremented == rhs.is_incremented && lhs.dim_idx == rhs.dim_idx; } bool operator!=(const LoopPort& lhs, const LoopPort& rhs) { @@ -38,8 +38,8 @@ bool operator!=(const LoopPort& lhs, const LoopPort& rhs) { } bool operator<(const LoopPort& lhs, const LoopPort& rhs) { - return (lhs.expr_port < rhs.expr_port) || - (lhs.expr_port == rhs.expr_port && + return (*lhs.expr_port < *rhs.expr_port) || + (*lhs.expr_port == *rhs.expr_port && (lhs.is_incremented < rhs.is_incremented || (lhs.is_incremented == rhs.is_incremented && lhs.dim_idx < rhs.dim_idx))); } diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp index f76c4097b38f38..0269c65109b701 100644 --- a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp @@ -12,7 +12,7 @@ #include "snippets/lowered/pass/define_buffer_clusters.hpp" #include "snippets/lowered/pass/normalize_buffer_reg_groups.hpp" #include "snippets/lowered/pass/propagate_buffer_offset.hpp" -#include "snippets/pass/tokenization.hpp" +#include "snippets/lowered/pass/mark_invariant_shape_path.hpp" #include "snippets/itt.hpp" #include "snippets/utils/utils.hpp" @@ -30,6 +30,7 @@ bool AllocateBuffers::run(lowered::LinearIR& linear_ir, lowered::LinearIR::const PassPipeline pipeline; pipeline.register_pass(); if (m_is_optimized_mode) { + pipeline.register_pass(); pipeline.register_pass(); pipeline.register_pass(); pipeline.register_pass(buffer_scratchpad_size); diff --git a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp index c43b5d63a358c6..e8132d62be0cc9 100644 --- a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp +++ b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp @@ -4,24 +4,42 @@ #include "snippets/lowered/pass/define_buffer_clusters.hpp" +#include "snippets/lowered/pass/mark_invariant_shape_path.hpp" #include "snippets/lowered/pass/set_buffer_reg_group.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/utils/utils.hpp" #include "snippets/itt.hpp" + namespace ov { namespace snippets { namespace lowered { namespace pass { -using ShiftPtrParams = SetBufferRegGroup::ShiftPtrParams; +namespace { + +// Find Loops which are connected to the current `buffer_expr` (consumer of Buffer is port of these Loops) +std::vector get_connected_loops(const BufferExpressionPtr& buffer_expr, const ExpressionPtr& consumer_expr) { + // [133463] Remove it please + if (ov::is_type(consumer_expr->get_node())) + return {}; + const auto& buffer_loops_ids = buffer_expr->get_loop_ids(); + const auto& consumer_loop_ids = consumer_expr->get_loop_ids(); + OPENVINO_ASSERT(buffer_loops_ids.size() <= consumer_loop_ids.size(), "Buffer with consumer are in incorrect loops"); + const auto mismatched_its = std::mismatch(buffer_loops_ids.begin(), buffer_loops_ids.end(), consumer_loop_ids.begin()); + return {mismatched_its.second, consumer_loop_ids.cend()}; +} +} // namespace + +using LoopPortInfo = UnifiedLoopInfo::LoopPortInfo; DefineBufferClusters::BufferClusters::iterator DefineBufferClusters::find_cluster_by_expr(const BufferExpressionPtr& target) { return std::find_if(m_clusters.begin(), m_clusters.end(), [&target](const BufferCluster& cluster) { return cluster.count(target) > 0; }); } -bool DefineBufferClusters::is_direct_buffer(const BufferExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) const { +bool DefineBufferClusters::is_direct_buffer(const BufferExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) { return buffer_expr && buffer_expr->get_loop_ids() == target_expr->get_loop_ids(); } @@ -33,7 +51,16 @@ void DefineBufferClusters::create_new_cluster(const BufferExpressionPtr& buffer_ } } -size_t DefineBufferClusters::get_cluster_buffer_id(const BufferCluster& cluster) const { +void DefineBufferClusters::add_buffers_to_cluster(BufferCluster& existing_cluster, const std::set& buffers) { + existing_cluster.insert(buffers.cbegin(), buffers.cend()); + // All buffers in one cluster must be only static or dynamic (no mixes). + if (std::any_of(existing_cluster.cbegin(), existing_cluster.cend(), [](const BufferExpressionPtr& buffer) { return !buffer->is_defined(); })) { + for (const auto& buffer : existing_cluster) + buffer->set_allocation_size(utils::get_dynamic_value()); + } +} + +size_t DefineBufferClusters::get_cluster_buffer_id(const BufferCluster& cluster) { OPENVINO_ASSERT(!cluster.empty(), "Buffer cluster is empty!"); const auto id = cluster.cbegin()->get()->get_reg_group(); if (std::all_of(cluster.cbegin(), cluster.cend(), [&id](const BufferExpressionPtr& expr) { return expr->get_reg_group() == id; })) { @@ -42,141 +69,121 @@ size_t DefineBufferClusters::get_cluster_buffer_id(const BufferCluster& cluster) return SIZE_MAX; } -DefineBufferClusters::BufferPorts DefineBufferClusters::get_input_buffers(const ExpressionPtr& loop_expr) const { - BufferPorts input_buffers; - - const auto loop_end = ov::as_type_ptr(loop_expr->get_node()); - const auto in_count = loop_end->get_input_num(); - const auto& connectors = loop_expr->get_input_port_connectors(); - - // Input Buffers - for (size_t i = 0; i < in_count; ++i) { - const auto& source_expr = ov::as_type_ptr(connectors[i]->get_source().get_expr()); - if (!is_direct_buffer(source_expr, loop_expr)) +std::pair DefineBufferClusters::get_direct_buffers(const UnifiedLoopInfoPtr& loop_info, + const ExpressionPtr& loop_expr) { + BufferMap input_buffers; + const auto& loop_inputs = loop_info->get_input_ports_info(); + for (const auto& port_info : loop_inputs) { + const auto& buffer_expr = ov::as_type_ptr(port_info.port.expr_port->get_port_connector_ptr()->get_source().get_expr()); + if (!is_direct_buffer(buffer_expr, loop_expr)) + continue; + if (input_buffers.count(buffer_expr) > 0) { + const auto& port_desc = port_info.desc; + OPENVINO_ASSERT(input_buffers[buffer_expr].desc == port_desc, + "Invalid data pointer shifts: If Buffer has several consumers, this consumers must have the same shifts or zero"); continue; - // Save as input Buffer - const auto ret = input_buffers.insert(std::make_pair(source_expr, std::set{ i })).second; - if (!ret) - input_buffers[source_expr].insert(i); + } + input_buffers[buffer_expr] = port_info; } - return input_buffers; -} - -DefineBufferClusters::BufferPorts DefineBufferClusters::get_output_buffers(const ExpressionPtr& loop_expr) const { - BufferPorts output_buffers; - const auto loop_end = ov::as_type_ptr(loop_expr->get_node()); - const auto in_count = loop_end->get_input_num(); - const auto out_count = loop_end->get_output_num(); - const auto& connectors = loop_expr->get_input_port_connectors(); - - for (size_t i = in_count; i < in_count + out_count; ++i) { - for (const auto& consumer : connectors[i]->get_consumers()) { - const auto& consumer_expr = ov::as_type_ptr(consumer.get_expr()); - if (!is_direct_buffer(consumer_expr, loop_expr)) + BufferMap output_buffers; + const auto& loop_outputs = loop_info->get_output_ports_info(); + for (const auto& port_info : loop_outputs) { + const auto& consumer_inputs = port_info.port.expr_port->get_port_connector_ptr()->get_consumers(); + for (const auto& consumer_input : consumer_inputs) { + const auto& buffer_expr = ov::as_type_ptr(consumer_input.get_expr()); + if (!is_direct_buffer(buffer_expr, loop_expr)) continue; - // Save as output Buffer - output_buffers[consumer_expr] = { i }; + OPENVINO_ASSERT(output_buffers.count(buffer_expr) == 0, "Only one Buffer can be on node output!"); + output_buffers[buffer_expr] = port_info; } } - return output_buffers; + + return std::make_pair(input_buffers, output_buffers); } -void DefineBufferClusters::parse_loop(const LinearIR::constExprIt& expr_it) { +void DefineBufferClusters::parse_loop(const LoopManagerPtr& loop_manager, const LinearIR::constExprIt& expr_it) { const auto& expr = *expr_it; - const auto loop_end = ov::as_type_ptr(expr->get_node()); - const auto& ptr_increments = loop_end->get_ptr_increments(); - const auto& final_offsets = loop_end->get_finalization_offsets(); - const auto& data_sizes = loop_end->get_element_type_sizes(); + const auto& loop_end = ov::as_type_ptr(expr->get_node()); + const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id()); - // [ Expression -> Port indexes ] - const auto input_buffers = get_input_buffers(expr); - const auto output_buffers = get_output_buffers(expr); + BufferMap input_buffers, output_buffers; + std::tie(input_buffers, output_buffers) = get_direct_buffers(loop_info, expr); for (const auto& in : input_buffers) create_new_cluster(in.first); std::set visited_buffers; for (const auto& out : output_buffers) { - const auto output_buffer_expr = out.first; - const auto output_buffer_port_idx = *(out.second.cbegin()); // Output port is always one + const auto& output_buffer_expr = out.first; + const auto& output_buffer_port_info = out.second; bool has_been_added = false; for (const auto& in : input_buffers) { const auto& input_buffer_expr = in.first; + const auto& input_buffer_port_info = in.second; if (visited_buffers.count(input_buffer_expr) > 0) continue; - // If allocated sizes of buffers are unkown on compilation stage (dynamic), - // we cannot be sure that they're will be the same in runtime. - if (!input_buffer_expr->is_defined()|| !output_buffer_expr->is_defined()) + // Memory can be reused if reading and writing are executed proportionally: + // - output buffer can have precision with data size less than input buffer + if ((input_buffer_expr->get_data_type().size() < output_buffer_expr->get_data_type().size())) continue; - // Memory can be reused if reading and writing are executed proportionally: - // - the same reading/writing order - // - the same buffer memory sizes - if ((input_buffer_expr->get_byte_size() != output_buffer_expr->get_byte_size()) || - (input_buffer_expr->get_output_port_descriptor(0)->get_layout() != output_buffer_expr->get_input_port_descriptor(0)->get_layout())) + const auto in_path = MarkInvariantShapePath::getInvariantPortShapePath(*input_buffer_port_info.port.expr_port); + const auto out_path = MarkInvariantShapePath::getInvariantPortShapePath(*output_buffer_port_info.port.expr_port); + // - Memory can be reused if there are the same loop pointer increments (data size, final offsets, ptr increments). + // For that, loop ports with buffers should be on the same shape-path and have the same value of `is_incremented`. + if (in_path != out_path || input_buffer_port_info.port.is_incremented != output_buffer_port_info.port.is_incremented) continue; - // Also memory can be reused if there are the same ShiftPtrParams (data size, final offsets, ptr increments) - const auto& input_buffer_ports = in.second; - for (const auto& input_buffer_port_idx : input_buffer_ports) { - const auto input_params = - ShiftPtrParams(data_sizes[input_buffer_port_idx], ptr_increments[input_buffer_port_idx], final_offsets[input_buffer_port_idx]); - const auto output_params = - ShiftPtrParams(data_sizes[output_buffer_port_idx], ptr_increments[output_buffer_port_idx], final_offsets[output_buffer_port_idx]); - - // If data pointer shift parameters are unknown on model compilation stage (dynamic), - // we cannot be sure that these data pointers will be proportionally shifted in runtime. - if (input_params.is_static() && output_params.is_static() && input_params == output_params) { - const auto cluster_it = find_cluster_by_expr(input_buffer_expr); - OPENVINO_ASSERT(cluster_it != m_clusters.end(), "Buffer on inputs of Loop must be already saved in clusters"); - // Add to the existing cluster - has_been_added = cluster_it->insert(output_buffer_expr).second; - OPENVINO_ASSERT(has_been_added, "Buffer has not been saved in cluster"); - // Remove input buffer because we have already use its memory - visited_buffers.insert(input_buffer_expr); - break; - } + // - Memory can be shared if Buffer has the same allocation size. + if (input_buffer_expr->is_defined() && output_buffer_expr->is_defined()) { + if (input_buffer_expr->get_allocation_size() != output_buffer_expr->get_allocation_size()) + continue; + } else { + // If allocation sizes are undefined, we can check if they have the same allocation sizes in runtime: + // - they should calculate allocation size using the common algorithm from `BufferExpression::init_allocation_size`. + if (!utils::everyone_is(BufferExpression::get_type_info_static(), input_buffer_expr->get_type_info(), output_buffer_expr->get_type_info())) + continue; } - if (has_been_added) break; + + const auto cluster_it = find_cluster_by_expr(input_buffer_expr); + OPENVINO_ASSERT(cluster_it != m_clusters.end(), "Buffer on inputs of Loop must be already saved in clusters"); + // Add to the existing cluster + add_buffers_to_cluster(*cluster_it, {output_buffer_expr}); + // Remove input buffer because we have already use its memory + visited_buffers.insert(input_buffer_expr); + has_been_added = true; + break; } if (!has_been_added) { - m_clusters.push_back(BufferCluster{output_buffer_expr}); + create_new_cluster(output_buffer_expr); } } // Check Buffers inside to possible memory reusing using `window` sliding - parse_nested_loops(input_buffers, output_buffers, expr_it); + parse_nested_loops(loop_manager, input_buffers, output_buffers, expr_it); } -void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers, const BufferPorts& output_buffers, - const LinearIR::constExprIt& outer_loop_end_expr_it) { +void DefineBufferClusters::parse_nested_loops(const LoopManagerPtr& loop_manager, const BufferMap& input_buffers, + const BufferMap& output_buffers, const LinearIR::constExprIt& outer_loop_end_expr_it) { if (input_buffers.empty() && output_buffers.empty()) return; - // The inner Buffer can reuse memory of the outer Buffer using `window` sliding only if: - // - The finalization offset of the latest Loop connected to the inner Buffer is equal to pointer increment of outer Buffer to emulate `window` sliding - // - This outer Buffer should have the same Buffer ID as inner to move data ptr of inner Buffer after each outer Loop iteration. - // It's needed because all Loops reset data pointers of connected Buffer after full work. - // To avoid rewriting of outer Buffer data we have to have the same Buffer ID (GPR) to proportionally shift pointers both Buffers. - - auto can_be_data_ptr_proportionally_shifted = [](int64_t outer_buffer_ptr_increment, int64_t outer_buffer_data_size, - int64_t inner_buffer_final_offsets, int64_t inner_buffer_data_size) { - // If data pointer shift parameters are unknown on model compilation stage (dynamic), - // we cannot be sure that these data pointers will be proportionally shifted in runtime. - if (utils::is_dynamic_value(outer_buffer_ptr_increment) || utils::is_dynamic_value(inner_buffer_final_offsets)) + auto can_be_data_ptr_proportionally_shifted = [](const LoopPortInfo& outer_port_info, const LoopPortInfo& inner_port_info) { + // Outer Buffer ptr should be shifted to emulate "window" sliding + const auto& outer_desc = outer_port_info.desc; + if (!outer_port_info.port.is_incremented || (!utils::is_dynamic_value(outer_desc.ptr_increment) && outer_desc.ptr_increment == 0)) return false; - return (outer_buffer_ptr_increment != 0) && - ((inner_buffer_data_size * inner_buffer_final_offsets * -1) == outer_buffer_ptr_increment * outer_buffer_data_size); - }; - const auto outer_loop_end = ov::as_type_ptr(outer_loop_end_expr_it->get()->get_node()); - const auto outer_loop_begin = outer_loop_end->get_loop_begin(); - const auto& outer_ptr_increments = outer_loop_end->get_ptr_increments(); - const auto& outer_data_sizes = outer_loop_end->get_element_type_sizes(); + OPENVINO_ASSERT(inner_port_info.port.expr_port && outer_port_info.port.expr_port, "Expression ports are nullptr!"); + // we can be sure that these data pointers will be proportionally shifted if they're on the same invariant shape path + return MarkInvariantShapePath::getInvariantPortShapePath(*inner_port_info.port.expr_port) == + MarkInvariantShapePath::getInvariantPortShapePath(*outer_port_info.port.expr_port); + }; + const auto outer_loop_begin = ov::as_type_ptr(outer_loop_end_expr_it->get()->get_node())->get_loop_begin(); for (auto it = std::reverse_iterator(outer_loop_end_expr_it); (*it)->get_node() != outer_loop_begin; ++it) { const auto& inner_expr = *it; if (const auto inner_buffer_expr = ov::as_type_ptr(inner_expr)) { @@ -185,9 +192,12 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers, const auto inner_cluster_id = get_cluster_buffer_id(*inner_cluster_it); if (inner_cluster_id == SIZE_MAX) continue; - const auto final_offset = get_buffer_finalization_offset(inner_buffer_expr); + // If inner Buffer is not connected to the Loop - `window` sliding effect is not possible + LoopPortInfo final_loop_info; + if (!init_buffer_last_loop_port_info(loop_manager, inner_buffer_expr, final_loop_info)) + continue; - auto unite = [&](const BufferPorts& ports, const bool is_input) { + auto unite = [&](const BufferMap& ports, const bool is_input) { bool applied = false; for (const auto& port : ports) { const auto cluster_it = find_cluster_by_expr(port.first); @@ -196,17 +206,15 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers, if (cluster_it == inner_cluster_it) continue; // Buffer from one cluster must be only defined (with known allocation_size) or dynamic (with unknown allocation_size) if (inner_buffer_expr->is_defined() != port.first->is_defined()) continue; - - bool can_be_reused = true; - for (const auto idx : port.second) { - can_be_reused = can_be_reused && - can_be_data_ptr_proportionally_shifted(outer_ptr_increments[idx], outer_data_sizes[idx], - final_offset, inner_buffer_expr->get_node()->get_element_type().size()); - } - if (!can_be_reused) - continue; - - applied = unite_nested_clusters(inner_cluster_it, *cluster_it, port.first, is_input); + // The inner Buffer can reuse memory of the outer Buffer using `window` sliding only if: + // - The finalization offset of the latest Loop connected to the inner Buffer is equal to + // pointer increment of outer Buffer to emulate `window` sliding + // - This outer Buffer should have the same Buffer ID as inner to move data ptr of inner Buffer after each outer Loop iteration. + // It's needed because all Loops reset data pointers of connected Buffer after full work. + // To avoid rewriting of outer Buffer data we have to have the same Buffer ID (GPR) to proportionally shift pointers both Buffers. + if (!can_be_data_ptr_proportionally_shifted(port.second, final_loop_info)) continue; + + applied = unite_nested_clusters(loop_manager, inner_cluster_it, *cluster_it, port.first, is_input); if (applied) break; } return applied; @@ -218,101 +226,66 @@ void DefineBufferClusters::parse_nested_loops(const BufferPorts& input_buffers, } } -int64_t DefineBufferClusters::get_buffer_finalization_offset(const BufferExpressionPtr& buffer_expr) const { - auto index = [](const std::vector& loop_inputs, const PortConnectorPtr& buffer_out) { - const auto it = std::find(loop_inputs.cbegin(), loop_inputs.cend(), buffer_out); - OPENVINO_ASSERT(it != loop_inputs.cend(), "Buffer output PortConnector has not been found in target LoopEnd inputs"); - return std::distance(loop_inputs.cbegin(), it); +bool DefineBufferClusters::init_buffer_last_loop_port_info(const LoopManagerPtr& loop_manager, const BufferExpressionPtr& buffer_expr, + UnifiedLoopInfo::LoopPortInfo& port_info) { + auto get_direct_loop_for_buffer_out = [&](const BufferExpressionPtr& buffer_expr, const ExpressionPtr& consumer_expr) -> UnifiedLoopInfoPtr { + const auto inner_loops = get_connected_loops(buffer_expr, consumer_expr); + if (inner_loops.empty()) + return nullptr; + return loop_manager->get_loop_info(inner_loops.front()); }; - int64_t final_offset = 0; + + bool found = false; double last_loop_exec_order = -1 * std::numeric_limits::max(); const auto& buffer_outs = buffer_expr->get_output_port_connectors(); for (const auto& buffer_out : buffer_outs) { const auto consumers = buffer_out->get_consumers(); for (const auto& consumer : consumers) { - const auto consumer_expr = consumer.get_expr(); - const auto loop_end = ov::as_type_ptr(consumer_expr->get_node()); - if (loop_end && consumer_expr->get_loop_ids() == buffer_expr->get_loop_ids()) { - const auto loop_order = consumer_expr->get_exec_num(); + if (const auto& direct_loop = get_direct_loop_for_buffer_out(buffer_expr, consumer.get_expr())) { + const auto loop_order = direct_loop->get_output_ports().back().expr_port->get_expr()->get_exec_num(); if (loop_order > last_loop_exec_order) { - const auto& loop_inputs = consumer_expr->get_input_port_connectors(); - final_offset = loop_end->get_finalization_offsets()[index(loop_inputs, buffer_out)]; + OPENVINO_ASSERT(direct_loop->is_loop_port(consumer), "Consumer of Buffer from another loop must be loop port"); + port_info = direct_loop->get_loop_port_info(consumer); last_loop_exec_order = loop_order; + found = true; } } } } - return final_offset; + return found; } -bool DefineBufferClusters::unite_nested_clusters(const BufferClusters::iterator& inner_cluster_it, - BufferCluster& outer_cluster, - const BufferExpressionPtr& outer_buffer, bool is_outer_up) { +bool DefineBufferClusters::unite_nested_clusters(const LoopManagerPtr& loop_manager, const BufferClusters::iterator& inner_cluster_it, + BufferCluster& outer_cluster, const BufferExpressionPtr& outer_buffer, bool is_outer_up) { for (const auto& inner_buffer : *inner_cluster_it) { - ExpressionPtr common_loop_end_expr = nullptr; - size_t outer_idx = SIZE_MAX, inner_idx = SIZE_MAX; - const auto& up_buffer = is_outer_up ? outer_buffer : inner_buffer; - const auto& down_buffer = is_outer_up ? inner_buffer : outer_buffer; - auto& up_idx = is_outer_up ? outer_idx : inner_idx; - auto& down_idx = is_outer_up ? inner_idx : outer_idx; - if (are_buffer_neighbours(up_buffer, down_buffer, common_loop_end_expr, up_idx, down_idx)) { - const auto common_loop_end = ov::as_type_ptr(common_loop_end_expr->get_node()); - const auto& inner_ptr_increments = common_loop_end->get_ptr_increments(); - const auto& inner_final_offsets = common_loop_end->get_finalization_offsets(); - const auto& inner_data_sizes = common_loop_end->get_element_type_sizes(); - if (SetBufferRegGroup::can_be_in_one_group({ inner_data_sizes[up_idx], inner_ptr_increments[up_idx], inner_final_offsets[up_idx] }, - { inner_data_sizes[down_idx], inner_ptr_increments[down_idx], inner_final_offsets[down_idx] })) { - for (const auto& inner_buffer : *inner_cluster_it) - inner_buffer->set_reg_group(outer_buffer->get_reg_group()); - - outer_cluster.insert(inner_cluster_it->cbegin(), inner_cluster_it->cend()); - m_clusters.erase(inner_cluster_it); - return true; + const auto& upper_buffer = is_outer_up ? outer_buffer : inner_buffer; + const auto& lower_buffer = is_outer_up ? inner_buffer : outer_buffer; + + const auto& lower_buffer_source = lower_buffer->get_input_port_connector(0)->get_source(); + const auto& upper_buffer_consumers = upper_buffer->get_output_port_connector(0)->get_consumers(); + for (const auto& upper_buffer_consumer : upper_buffer_consumers) { + const auto& connected_loops = get_connected_loops(upper_buffer, upper_buffer_consumer.get_expr()); + for (const auto& loop_id : connected_loops) { + const auto& common_loop_info = loop_manager->get_loop_info(loop_id); + if (!common_loop_info->is_loop_port(lower_buffer_source) || !common_loop_info->is_loop_port(upper_buffer_consumer)) + continue; + + const auto upper_port_desc = common_loop_info->get_loop_port_info(upper_buffer_consumer); + const auto lower_port_desc = common_loop_info->get_loop_port_info(lower_buffer_source); + if (SetBufferRegGroup::can_be_in_one_reg_group(upper_port_desc, lower_port_desc)) { + for (const auto& inner_buffer : *inner_cluster_it) + inner_buffer->set_reg_group(outer_buffer->get_reg_group()); + + add_buffers_to_cluster(outer_cluster, *inner_cluster_it); + m_clusters.erase(inner_cluster_it); + return true; + } } } } return false; } -bool DefineBufferClusters::are_buffer_neighbours(const BufferExpressionPtr& up, const BufferExpressionPtr& down, ExpressionPtr& loop, - size_t& up_idx, size_t& down_idx) { - auto find_input = [&down](const PortConnectorPtr& in) { - return in->get_source().get_expr() == down; - }; - auto find_output = [&down](const PortConnectorPtr& in) { - const auto consumers = in->get_consumers(); - return std::any_of(consumers.cbegin(), consumers.cend(), - [&down](const ExpressionPort& port) { return port.get_expr() == down; }); - }; - auto find = [&](const std::vector::const_iterator& begin, - const std::vector::const_iterator& end, - const std::vector::const_iterator& orig_begin, - const ExpressionPort& loop_port, - bool is_input) -> bool { - const auto in_buffer_it = is_input ? std::find_if(begin, end, find_input) - : std::find_if(begin, end, find_output); - if (in_buffer_it != end) { - up_idx = loop_port.get_index(); - down_idx = std::distance(orig_begin, in_buffer_it); - loop = loop_port.get_expr(); - return true; - } - return false; - }; - for (const auto& out : up->get_output_port_connectors()) { - for (const auto& buffer_consumer : out->get_consumers()) { - const auto buffer_consumer_expr = buffer_consumer.get_expr(); - const auto loop_end = ov::as_type_ptr(buffer_consumer_expr->get_node()); - if (!loop_end) - continue; - const auto& loop_inputs = buffer_consumer_expr->get_input_port_connectors(); - if (find(loop_inputs.cbegin(), loop_inputs.cbegin() + loop_end->get_input_num(), loop_inputs.cbegin(), buffer_consumer, true)) return true; - if (find(loop_inputs.cbegin() + loop_end->get_input_num(), loop_inputs.cend(), loop_inputs.cbegin(), buffer_consumer, false)) return true; - } - } - return false; -} - void DefineBufferClusters::parse_memory_access_op(const ExpressionPtr& expr) { const auto ma = std::dynamic_pointer_cast(expr->get_node()); // TODO: Some full MemoryAccess ops can have inplace inputs and outputs in general. @@ -340,7 +313,7 @@ bool DefineBufferClusters::run(lowered::LinearIR& linear_ir, lowered::LinearIR:: const auto& expr = *expr_it; const auto op = expr->get_node(); if (ov::is_type(op)) { - parse_loop(expr_it); + parse_loop(linear_ir.get_loop_manager(), expr_it); continue; } diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index aa7d0ab042e1a4..69d336094f1a14 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -16,46 +16,10 @@ namespace lowered { namespace pass { namespace { -inline void init_is_incremented(LoopPort& port, size_t loop_id) { +inline void init_is_incremented(LoopPort& port) { const auto& expr = port.expr_port->get_expr(); - const auto& expr_loops = expr->get_loop_ids(); if (!std::dynamic_pointer_cast(expr->get_node())) { port.is_incremented = false; - } else if (expr_loops.back() != loop_id) { - // Note: LoopPort connected to Buffer between two loops should not be incremented in the outermost loop - // Consider the example below: - // Store; Loop ids [0,1,2,3] - // Buffer; Loop ids [0,1] - // Load; Loop ids [0,1,4,5] - // Store is output port of Loop-1, but it should be incremented only in Loop-2 and Loop-3. Similar with Load. - auto is_ignored = [=](const ExpressionPtr& target_expr) { - if (ov::is_type(target_expr)) { - const auto& target_loops = target_expr->get_loop_ids(); - const auto i_max = std::min(expr_loops.size(), target_loops.size()); - for (size_t i = 0; i < i_max && expr_loops[i] == target_loops[i]; i++) { - if (target_loops[i] == loop_id) - return true; - } - } - return false; - }; - if (port.expr_port->get_type() == ExpressionPort::Type::Output) { - const auto& out_connector = expr->get_output_port_connector(port.expr_port->get_index()); - for (const auto& consumer : out_connector->get_consumers()) { - if (is_ignored(consumer.get_expr())) { - port.is_incremented = false; - return; - } - } - } else if (port.expr_port->get_type() == ExpressionPort::Type::Input) { - const auto& in_connector = expr->get_input_port_connector(port.expr_port->get_index()); - if (is_ignored(in_connector->get_source().get_expr())) { - port.is_incremented = false; - return; - } - } else { - OPENVINO_THROW("Unexpected LoopPort type"); - } } } @@ -71,11 +35,11 @@ inline int64_t get_data_size(const LoopPort& loop_port) { } } // namespace -void InitLoops::update_compile_parameters(const UnifiedLoopInfoPtr& loop_info, size_t loop_id) { +void InitLoops::update_compile_parameters(const UnifiedLoopInfoPtr& loop_info) { OPENVINO_ASSERT(loop_info != nullptr, "UnifiedLoopInfo is nullptr, nothing to update"); loop_info->iterate_through_infos( - [loop_id](LoopPort& loop_port, UnifiedLoopInfo::LoopPortDesc& ptr_shifts_params) { - init_is_incremented(loop_port, loop_id); + [](LoopPort& loop_port, UnifiedLoopInfo::LoopPortDesc& ptr_shifts_params) { + init_is_incremented(loop_port); ptr_shifts_params.data_size = get_data_size(loop_port); }); } @@ -85,12 +49,10 @@ bool InitLoops::run(LinearIR& linear_ir) { if (linear_ir.empty()) return false; - const auto& loop_manager = linear_ir.get_loop_manager(); - const auto& loops = loop_manager->get_map(); + const auto& loops = linear_ir.get_loop_manager()->get_map(); for (const auto& loop : loops) { - const auto& loop_id = loop.first; const auto& loop_info = ov::as_type_ptr(loop.second); - update_compile_parameters(loop_info, loop_id); + update_compile_parameters(loop_info); ov::snippets::utils::update_runtime_parameters(loop_info); } diff --git a/src/common/snippets/src/lowered/pass/mark_invariant_shape_path.cpp b/src/common/snippets/src/lowered/pass/mark_invariant_shape_path.cpp new file mode 100644 index 00000000000000..b32056d4e32a57 --- /dev/null +++ b/src/common/snippets/src/lowered/pass/mark_invariant_shape_path.cpp @@ -0,0 +1,128 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include "snippets/lowered/pass/mark_invariant_shape_path.hpp" + +#include "snippets/lowered/expressions/buffer_expression.hpp" +#include "snippets/op/memory_access.hpp" +#include "snippets/snippets_isa.hpp" +#include "snippets/utils/utils.hpp" +#include "snippets/itt.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +namespace { + +// Specific value to mark ports which doesn't affect output shape of broadcastable ops. +// For example, ops with output scalar shape or Horizon ops. +static const size_t NOT_AFFECTING_PATH = SIZE_MAX; + +static bool is_shape_broadcastable_op(const ExpressionPtr& expr) { + return expr->get_node()->get_autob() != ov::op::AutoBroadcastType::NONE; +} + +static bool is_not_affecting_op(const ExpressionPtr& expr) { + const auto& node = expr->get_node(); + return ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node); +} + +static bool is_affecting_op(const ExpressionPtr& expr) { + const auto& node = expr->get_node(); + return ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node); +} +} // namespace + +size_t MarkInvariantShapePath::getInvariantPortShapePath(const ExpressionPort& port) { + auto& rt = get_rt_info(port); + const auto rinfo = rt.find("InvariantShapePath"); + OPENVINO_ASSERT(rinfo != rt.end(), "Invariant path for this expression port has not been marked!"); + return rinfo->second.as(); +} + +void MarkInvariantShapePath::SetInvariantPortShapePath(const ExpressionPort& port, size_t value) { + OPENVINO_ASSERT(port.get_type() == ExpressionPort::Output, "SetInvariantPortShapePath can be used only for output port"); + auto& rt = get_rt_info(port); + rt["InvariantShapePath"] = value; +} + +ov::RTMap& MarkInvariantShapePath::get_rt_info(const ExpressionPort& port) { + const auto& source_port = port.get_type() == ExpressionPort::Input ? port.get_port_connector_ptr()->get_source() : port; + const auto& node = source_port.get_expr()->get_node(); + const auto port_idx = source_port.get_index(); + OPENVINO_ASSERT(port_idx < node->get_output_size(), "Node has incompatible port count with the expression"); + return node->output(port_idx).get_rt_info(); +} + +bool MarkInvariantShapePath::run(lowered::LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::MarkInvariantShapePath"); + + bool modified = false; + + // Shape -> color + std::map colored_shapes; + + size_t color_path = 0; + + auto merge_paths = [&color_path](size_t lhs, size_t rhs) { + if (lhs == rhs || rhs == NOT_AFFECTING_PATH) return lhs; + if (lhs == NOT_AFFECTING_PATH) return rhs; + return ++color_path; + }; + + for (auto expr_it = begin; expr_it != end; ++expr_it) { + const auto& expr = *expr_it; + if (ov::is_type(expr->get_node())) + continue; + + for (size_t out_idx = 0; out_idx < expr->get_output_count(); ++out_idx) { + const auto& out_shape = expr->get_output_port_descriptor(out_idx)->get_shape(); + size_t current_color_path; + if (colored_shapes.count(out_shape)) { + current_color_path = colored_shapes.at(out_shape); + } else if (!utils::is_dynamic_vdims(out_shape) && ov::shape_size(out_shape) == 1) { + current_color_path = NOT_AFFECTING_PATH; + } else { + if (is_affecting_op(expr)) { + current_color_path = ++color_path; + } else if (is_not_affecting_op(expr)) { + current_color_path = NOT_AFFECTING_PATH; + } else if (is_shape_broadcastable_op(expr)) { + current_color_path = NOT_AFFECTING_PATH; + for (size_t in_idx = 0; in_idx < expr->get_input_count(); ++in_idx) { + const auto input_path = getInvariantPortShapePath(expr->get_input_port(in_idx)); + current_color_path = merge_paths(current_color_path, input_path); + } + } else { + current_color_path = expr->get_input_count() > 0 ? getInvariantPortShapePath(expr->get_input_port(0)) + : ++color_path; + } + + if (!utils::is_dynamic_vdims(out_shape)) + colored_shapes[out_shape] = current_color_path; + } + + SetInvariantPortShapePath(expr->get_output_port(out_idx), current_color_path); + modified = true; + } + } + + return modified; +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp b/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp index 9bdb5e8ef3a9dc..46248b9c277818 100644 --- a/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp +++ b/src/common/snippets/src/lowered/pass/set_buffer_reg_group.cpp @@ -4,8 +4,10 @@ #include "snippets/lowered/pass/set_buffer_reg_group.hpp" +#include "snippets/lowered/pass/mark_invariant_shape_path.hpp" #include "snippets/lowered/linear_ir.hpp" -#include "snippets/snippets_isa.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/lowered/expressions/buffer_expression.hpp" #include "snippets/itt.hpp" namespace ov { @@ -19,54 +21,44 @@ inline size_t index(size_t col_num, size_t row, size_t col) { } } // namespace -bool operator==(const SetBufferRegGroup::ShiftPtrParams& lhs, const SetBufferRegGroup::ShiftPtrParams& rhs) { - if (&lhs == &rhs) - return true; - return lhs.ptr_increment == rhs.ptr_increment && lhs.finalization_offset == rhs.finalization_offset && lhs.data_size == rhs.data_size; -} -bool operator!=(const SetBufferRegGroup::ShiftPtrParams& lhs, const SetBufferRegGroup::ShiftPtrParams& rhs) { - return !(rhs == lhs); -} - size_t SetBufferRegGroup::get_buffer_idx(const BufferExpressionPtr& target, const BufferPool& pool) { const auto iter = std::find(pool.cbegin(), pool.cend(), target); OPENVINO_ASSERT(iter != pool.cend(), "Buffer wasn't find in Buffer system of Subgraph"); return std::distance(pool.cbegin(), iter); } -bool SetBufferRegGroup::can_be_in_one_group(const ShiftPtrParams& lhs, const ShiftPtrParams& rhs) { - // If data pointer shift parameters are unknown on model compilation stage (dynamic), - // we cannot be sure that these data pointers will be proportionally shifted. - // Then we force `false` value here to set unique registers for these buffers - const auto are_static = lhs.is_static() && rhs.is_static(); - const auto equal_ptr_params_shifting = lhs.ptr_increment == rhs.ptr_increment && lhs.finalization_offset == rhs.finalization_offset; - const auto equal_element_type_sizes = lhs.data_size == rhs.data_size; - return are_static && equal_ptr_params_shifting && (equal_element_type_sizes || (lhs.ptr_increment == 0 && lhs.finalization_offset == 0)); +bool SetBufferRegGroup::can_be_in_one_reg_group(const UnifiedLoopInfo::LoopPortInfo& lhs_info, + const UnifiedLoopInfo::LoopPortInfo& rhs_info) { + const auto equal_element_type_sizes = lhs_info.desc.data_size == rhs_info.desc.data_size; + OPENVINO_ASSERT(lhs_info.port.expr_port && rhs_info.port.expr_port, "Expression ports are nullptr!"); + const auto equal_invariant_shape_paths = + MarkInvariantShapePath::getInvariantPortShapePath(*lhs_info.port.expr_port) == + MarkInvariantShapePath::getInvariantPortShapePath(*rhs_info.port.expr_port); + const auto equal_is_incremented = lhs_info.port.is_incremented == rhs_info.port.is_incremented; + return equal_invariant_shape_paths && equal_is_incremented && + (equal_element_type_sizes || !lhs_info.port.is_incremented || (lhs_info.desc.ptr_increment == 0 && lhs_info.desc.finalization_offset == 0)); } -bool SetBufferRegGroup::are_adjacent(const std::pair& lhs, - const std::pair& rhs) { +bool SetBufferRegGroup::are_adjacent(const BufferMap::value_type& lhs, const BufferMap::value_type& rhs) { const auto& lhs_ids = lhs.first->get_loop_ids(); const auto& rhs_ids = rhs.first->get_loop_ids(); const auto equal_loop_ids = lhs_ids == rhs_ids; if (equal_loop_ids) { // Buffers are connected to the same Loop and have the same outer Loops - return !can_be_in_one_group(lhs.second, rhs.second); + return !can_be_in_one_reg_group(lhs.second, rhs.second); } else { // Buffers are connected to the same Loop, but one of Buffers - inside this Loop, another - outside - // Buffers are adjacent if outer Buffer has not zero data shift params + // Buffers are adjacent if outer Buffer has non-zero data shift params if (lhs_ids.size() == rhs_ids.size()) // If the count of outer Loops are equal, it means that outer loops are already different return true; const auto& outer_buffer = lhs_ids.size() < rhs_ids.size() ? lhs : rhs; const auto count_outer_loops = std::min(lhs_ids.size(), rhs_ids.size()); const auto are_outer_loops_the_same = lhs_ids.size() != rhs_ids.size() && std::equal(rhs_ids.cbegin(), rhs_ids.cbegin() + count_outer_loops, lhs_ids.cbegin()); - const auto outer_buffer_has_zero_shifts = outer_buffer.second.ptr_increment == 0 && outer_buffer.second.finalization_offset == 0; + const auto outer_buffer_has_zero_shifts = outer_buffer.second.desc.ptr_increment == 0 && outer_buffer.second.desc.finalization_offset == 0; return !(are_outer_loops_the_same && outer_buffer_has_zero_shifts); } } -void SetBufferRegGroup::update_adj_matrix(const std::pair& lhs, - const std::pair& rhs, - const BufferPool& buffers, +void SetBufferRegGroup::update_adj_matrix(const BufferMap::value_type& lhs, const BufferMap::value_type& rhs, const BufferPool& buffers, std::vector& adj) { const auto size = buffers.size(); const auto lhs_idx = get_buffer_idx(lhs.first, buffers); @@ -80,7 +72,8 @@ void SetBufferRegGroup::update_adj_matrix(const std::pair SetBufferRegGroup::create_adjacency_matrix(LinearIR::constExprIt begin, LinearIR::constExprIt end, const BufferPool& pool) { +std::vector SetBufferRegGroup::create_adjacency_matrix(const LoopManagerPtr& loop_manager, LinearIR::constExprIt begin, LinearIR::constExprIt end, + const BufferPool& pool) { // The sync point to check for adjacency is Loop because only in Loop we increment pointers. // So if some Buffers in the one Loop have conflict (cannot be inplace: the different ptr increment and data sizes) // they are called as adjacent @@ -91,10 +84,12 @@ std::vector SetBufferRegGroup::create_adjacency_matrix(LinearIR::constExpr for (auto expr_it = begin; expr_it != end; expr_it++) { const auto &expr = *expr_it; - if (!ov::is_type(expr->get_node())) + const auto& loop_end = ov::as_type_ptr(expr->get_node()); + if (!loop_end) continue; - const auto buffer_loop_neighbours = get_buffer_loop_neighbours(expr); + const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id()); + const auto buffer_loop_neighbours = get_buffer_loop_neighbours(loop_info); const auto buffers_loop_inside = get_buffer_loop_inside(expr_it); for (auto buffer_it = buffer_loop_neighbours.cbegin(); buffer_it != buffer_loop_neighbours.cend(); ++buffer_it) { // If Buffers, that are connected to the same Loop, have not proportionally ptr shift params for this Loop - these Buffers are adjacent @@ -113,47 +108,33 @@ std::vector SetBufferRegGroup::create_adjacency_matrix(LinearIR::constExpr return adj; } -SetBufferRegGroup::BufferMap SetBufferRegGroup::get_buffer_loop_neighbours(const ExpressionPtr& loop_end_expr) { - const auto& loop_end = ov::as_type_ptr(loop_end_expr->get_node()); - const auto input_count = loop_end->get_input_num(); - const auto output_count = loop_end->get_output_num(); - - const auto& ptr_increments = loop_end->get_ptr_increments(); - const auto& finalization_offsets = loop_end->get_finalization_offsets(); - const auto& data_sizes = loop_end->get_element_type_sizes(); - +SetBufferRegGroup::BufferMap SetBufferRegGroup::get_buffer_loop_neighbours(const UnifiedLoopInfoPtr& loop_info) { BufferMap buffer_neighbours; - for (size_t i = 0; i < input_count; ++i) { - const auto& parent_output = loop_end_expr->get_input_port_connector(i)->get_source().get_expr(); + + const auto& loop_inputs = loop_info->get_input_ports_info(); + for (const auto& port_info : loop_inputs) { + const auto& parent_output = port_info.port.expr_port->get_port_connector_ptr()->get_source().get_expr(); if (const auto buffer_expr = ov::as_type_ptr(parent_output)) { if (buffer_neighbours.count(buffer_expr) > 0) { - OPENVINO_ASSERT(buffer_neighbours[buffer_expr].ptr_increment == ptr_increments[i] && - buffer_neighbours[buffer_expr].finalization_offset == finalization_offsets[i], + const auto& port_desc = port_info.desc; + OPENVINO_ASSERT(buffer_neighbours[buffer_expr].desc == port_desc, "Invalid data pointer shifts: If Buffer has several consumers, this consumers must have the same shifts or zero"); continue; } - buffer_neighbours[buffer_expr] = { data_sizes[i], ptr_increments[i], finalization_offsets[i] }; + buffer_neighbours[buffer_expr] = port_info; } } - for (size_t i = input_count; i < input_count + output_count; ++i) { - // The consumers of the corresponding Store ops - const auto consumer_inputs = loop_end_expr->get_input_port_connector(i)->get_consumers(); - size_t buffer_count = 0; - size_t loop_count = 0; + + const auto& loop_outputs = loop_info->get_output_ports_info(); + for (const auto& port_info : loop_outputs) { + const auto& consumer_inputs = port_info.port.expr_port->get_port_connector_ptr()->get_consumers(); for (const auto& consumer_input : consumer_inputs) { const auto& child_expr = consumer_input.get_expr(); - if (const auto buffer_expr = ov::as_type_ptr(child_expr)) { - buffer_neighbours[buffer_expr] = { data_sizes[i], ptr_increments[i], finalization_offsets[i] }; - buffer_count++; - } else if (ov::is_type(child_expr->get_node())) { - loop_count++; - } - } - if (buffer_count > 0) { - OPENVINO_ASSERT((buffer_count == 1) && (buffer_count + loop_count == consumer_inputs.size()), - "Loop output must have not more than 1 Buffer"); + if (const auto buffer_expr = ov::as_type_ptr(child_expr)) + buffer_neighbours[buffer_expr] = port_info; } } + return buffer_neighbours; } @@ -164,9 +145,9 @@ SetBufferRegGroup::BufferMap SetBufferRegGroup::get_buffer_loop_inside(const Lin for (auto it = std::reverse_iterator(loop_end_it); (*it)->get_node() != loop_begin; ++it) { const auto& inner_expr = *it; if (const auto buffer_expr = ov::as_type_ptr(inner_expr)) { - // Set default zero values since it's not used for adjacency definition in case with Buffers in Loop + // Set default value (zeroes) since it's not used for adjacency definition in case with Buffers in Loop if (inner_buffers.count(buffer_expr) == 0) - inner_buffers[buffer_expr] = { 0, 0, 0 }; + inner_buffers[buffer_expr] = UnifiedLoopInfo::LoopPortInfo(); } } return inner_buffers; @@ -219,6 +200,7 @@ auto SetBufferRegGroup::coloring(BufferPool& buffers, std::vector& adj) -> bool SetBufferRegGroup::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SetBufferRegGroup") + // Identify Buffers using Graph coloring algorithm. BufferPool buffer_pool = linear_ir.get_buffers(); // For the better coloring Buffers should be stored in the order of execution numbers @@ -226,7 +208,7 @@ bool SetBufferRegGroup::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt [](const BufferExpressionPtr& lhs, const BufferExpressionPtr& rhs) { return lhs->get_exec_num() < rhs->get_exec_num(); }); // Creation of Adj matrix - auto adj = create_adjacency_matrix(begin, end, buffer_pool); + auto adj = create_adjacency_matrix(linear_ir.get_loop_manager(), begin, end, buffer_pool); // Graph coloring algorithm const auto color_groups = coloring(buffer_pool, adj); diff --git a/src/common/snippets/src/lowered/pass/validate_unified_loops.cpp b/src/common/snippets/src/lowered/pass/validate_unified_loops.cpp index bdfb8896405847..ec43f02d28792f 100644 --- a/src/common/snippets/src/lowered/pass/validate_unified_loops.cpp +++ b/src/common/snippets/src/lowered/pass/validate_unified_loops.cpp @@ -7,6 +7,7 @@ #include "snippets/itt.hpp" #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/loop_manager.hpp" +#include "snippets/utils/loop_utils.hpp" #include "snippets/utils/utils.hpp" namespace ov { @@ -14,14 +15,7 @@ namespace snippets { namespace lowered { namespace pass { -bool ValidateUnifiedLoops::run(LinearIR& linear_ir) { - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ValidateUnifiedLoops") - if (linear_ir.empty()) - return false; - - const auto& loop_manager = linear_ir.get_loop_manager(); - const auto& loops = loop_manager->get_map(); - +void ValidateUnifiedLoops::validate_loop_infos(const LoopManagerPtr& loop_manager) { // Already validated vectors of Loop IDs std::set> validated_nested_loops; auto is_already_verified = [&validated_nested_loops](const std::vector& ids) { @@ -66,10 +60,9 @@ bool ValidateUnifiedLoops::run(LinearIR& linear_ir) { validated_nested_loops.insert(loop_ids); }; - for (const auto& pair : loops) { + for (const auto& pair : loop_manager->get_map()) { const auto& loop_info = ov::as_type_ptr(pair.second); - OPENVINO_ASSERT(loop_info, - "ValidateUnifiedLoops expects only UnifiedLoopInfo in LoopManager"); + OPENVINO_ASSERT(loop_info, "ValidateUnifiedLoops expects only UnifiedLoopInfo in LoopManager"); loop_info->iterate_through_ports(validate_loop_port); // Validate that iteration dimnsion is broadcastable @@ -88,6 +81,46 @@ bool ValidateUnifiedLoops::run(LinearIR& linear_ir) { OPENVINO_ASSERT(unique_dimensions.size() <= 1, "Loop ports have incompatible dimensions, by which the loop iterates"); } +} + +void ValidateUnifiedLoops::validate_loop_port_presence(const LinearIR& linear_ir) { + auto validate_loop_port = [](const ExpressionPort& expr_port, const LoopInfoPtr& loop_info, size_t loop_id) { + if (utils::should_be_loop_port(expr_port, loop_id)) { + OPENVINO_ASSERT(loop_info->is_loop_port(expr_port), + "Expression port with idx ", expr_port.get_index(), " with node ", + expr_port.get_expr()->get_node()->get_friendly_name(), " is not Loop port but should be!"); + } else { + OPENVINO_ASSERT(!loop_info->is_loop_port(expr_port), + "Expression port with idx ", expr_port.get_index(), " with node ", + expr_port.get_expr()->get_node()->get_friendly_name(), " is Loop port but should not be!"); + } + }; + + const auto& loop_manager = linear_ir.get_loop_manager(); + for (const auto& expr : linear_ir) { + const auto& op = expr->get_node(); + if (ov::is_type(op)) + continue; + + for (const auto& loop_id : expr->get_loop_ids()) { + const auto& loop_info = loop_manager->get_loop_info(loop_id); + + for (size_t i = 0; i < expr->get_input_count(); ++i) + validate_loop_port(expr->get_input_port(i), loop_info, loop_id); + + for (size_t i = 0; i < expr->get_output_count(); ++i) + validate_loop_port(expr->get_output_port(i), loop_info, loop_id); + } + } +} + +bool ValidateUnifiedLoops::run(LinearIR& linear_ir) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ValidateUnifiedLoops") + if (linear_ir.empty()) + return false; + + validate_loop_infos(linear_ir.get_loop_manager()); + validate_loop_port_presence(linear_ir); return true; } diff --git a/src/common/snippets/src/pass/mha_tokenization.cpp b/src/common/snippets/src/pass/mha_tokenization.cpp index c42eb08b82bd4a..beb465ab3a3fbe 100644 --- a/src/common/snippets/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/src/pass/mha_tokenization.cpp @@ -268,16 +268,11 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken const auto pattern_rank = matmul0->get_output_partial_shape(0).size(); - const auto ops_count_before_softmax = ordered_ops.size(); auto interm_op = matmul0->get_output_target_inputs(0).begin()->get_node()->shared_from_this(); // Add supported operations which are between MatMul0 and Softmax to ordered_ops if (!update_intermediate_supported_ops(interm_op, ordered_ops, hidden_virtual_ports_count, potential_body_params_count)) return false; - // If before Softmax there is Eltwise ops, there will be one more Buffer - if (ops_count_before_softmax != ordered_ops.size() && interm_op->get_output_partial_shape(0).rbegin()->is_dynamic()) - uniqie_buffer_reg_group_count++; - std::shared_ptr reshape0 = nullptr; if (!tokenize_reshape_around_softmax(interm_op, reshape0, ordered_ops)) return false; @@ -295,10 +290,6 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken if (axis != rank.get_length() - 1 || interm_op->get_output_target_inputs(0).size() != 1) return false; - // Softmax need one buffer at least - if (interm_op->get_output_partial_shape(0).rbegin()->is_dynamic()) - uniqie_buffer_reg_group_count++; - ordered_ops.push_back(interm_op); interm_op = interm_op->get_output_target_inputs(0).begin()->get_node()->shared_from_this(); @@ -333,7 +324,7 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken // The Loop will have one Buffer with the same shape both on input and output. // Need to check for precision to get if we need one more register for Buffer const auto matmul0_prc = op::Brgemm::get_output_type(matmul0->get_input_element_type(0), matmul0->get_input_element_type(1)); - if (matmul1->get_input_element_type(0).size() != matmul0_prc.size() || matmul1->get_input_partial_shape(0).is_dynamic()) { + if (matmul1->get_input_element_type(0).size() != matmul0_prc.size()) { uniqie_buffer_reg_group_count++; } diff --git a/src/common/snippets/src/utils/loop_utils.cpp b/src/common/snippets/src/utils/loop_utils.cpp index dabd129fce451d..3d6b274c7613a8 100644 --- a/src/common/snippets/src/utils/loop_utils.cpp +++ b/src/common/snippets/src/utils/loop_utils.cpp @@ -82,6 +82,15 @@ void update_runtime_parameters(const UnifiedLoopInfoPtr& loop_info) { update_data_pointer_shifts(loop_info); } +bool should_be_loop_port(const ov::snippets::lowered::ExpressionPort& port, size_t loop_id) { + const auto& connected_ports = port.get_connected_ports(); + return std::any_of(connected_ports.cbegin(), connected_ports.cend(), + [&](const ExpressionPort& connected_port) { + const auto& loops = connected_port.get_expr()->get_loop_ids(); + return std::find(loops.cbegin(), loops.cend(), loop_id) == loops.cend(); + }); +} + } // namespace utils } // namespace snippets } // namespace ov \ No newline at end of file diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp index c5932ed690d670..040982feb4e0ec 100644 --- a/src/common/snippets/tests/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/tests/src/pass/mha_tokenization.cpp @@ -169,6 +169,10 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM) { } TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM_withMul) { +#if defined(WIN32) && !defined(NDEBUG) + test_skipped = true; + GTEST_SKIP() << "Skipping on Windows in Debug mode due to Issue 155258."; +#endif const auto& f = MHASplitMFunction(std::vector{{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), std::vector{{2, 64, 12, 64}, {128, 12, 1, 64}, {12, 2, 64, 128}, {1, 128, 12, 64}, {128, 12, 64}}, @@ -191,6 +195,10 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM) { } TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM_withMul) { +#if defined(WIN32) && !defined(NDEBUG) + test_skipped = true; + GTEST_SKIP() << "Skipping on Windows in Debug mode due to Issue 155258."; +#endif const auto& f = MHASplitMFunction(std::vector{{1, 384, 16, 64}, {1, 384, 16, 64}, {1, 1, 1, 384}, {1, 384, 16, 64}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), std::vector{{1, 6, 64, 16, 64}, {1, 384, 16, 1, 64}, {1, 1, 1, 1, 384}, {1, 1, 384, 16, 64}, {1, 384, 16, 64}}, diff --git a/src/common/transformations/include/ov_ops/dynamic_quantize.hpp b/src/common/transformations/include/ov_ops/dynamic_quantize.hpp index 69c148305fb94f..2eb79322b84e28 100644 --- a/src/common/transformations/include/ov_ops/dynamic_quantize.hpp +++ b/src/common/transformations/include/ov_ops/dynamic_quantize.hpp @@ -14,29 +14,75 @@ namespace internal { /// \brief Operator performing Dynamic Quantize class TRANSFORMATIONS_API DynamicQuantize : public ov::op::Op { public: - OPENVINO_OP("DynamicQuantize", "gpu_opset"); + OPENVINO_OP("DynamicQuantize", "ie_internal_opset"); + + /** + * @brief Configuration for the type of quantization applied to the data: + * - Symmetric: Quantization where the zero point is fixed at zero, and the range is symmetric around zero. + * - Asymmetric: Quantization where the zero point is not fixed at zero. + */ + enum class QuantizationType { Symmetric, Asymmetric }; + + /** + * @brief Configuration for how Activations, Scales and Zero Points will be stored in output buffers: + * - Planar: Activations, Scales, and Zero Points are stored in independent buffers. + * - InterleavedScalesZP: Activations are stored in an independent buffer, while Scales and Zero Points (if any) are + * combined in a separate buffer. + */ + enum class OutputStorageType { Planar, InterleavedScalesZP, /* InterleavedActivationsScalesZP */ }; + + /// \brief Structure that specifies attributes for interpolation + struct Attributes { + QuantizationType quantization_type = QuantizationType::Symmetric; + element::Type quantization_dt = element::undefined; + element::Type scale_dt = element::undefined; + element::Type zp_dt = element::undefined; + + std::vector group_sizes = {}; + std::vector scales_zp_output_order = {}; + OutputStorageType output_storage_type = OutputStorageType::Planar; + }; DynamicQuantize() = default; /// \brief Constructs an DynamicQuantize operation. /// /// \param data Input tensor with data - /// \param group_sizes Group sizes for dynamic quantization - /// \param dt_scale Data type for scale output - DynamicQuantize(const Output& data, std::vector group_sizes, element::Type dt_scale); + /// \param config Dynamic quantization configuration + DynamicQuantize(const Output& data, const Attributes& attrs); void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + const Attributes& get_attrs() const { + return m_attrs; + } + + void set_attrs(Attributes attrs) { + m_attrs = std::move(attrs); + } + const std::vector& get_group_sizes() const { - return m_group_sizes; - }; + return m_attrs.group_sizes; + } + + QuantizationType get_quantization_type() const { + return m_attrs.quantization_type; + } + + OutputStorageType get_output_storage_type() const { + return m_attrs.output_storage_type; + } + + const std::vector& get_scales_zp_output_order() const { + return m_attrs.scales_zp_output_order; + } + static std::vector shape_infer(const DynamicQuantize* op, - const std::vector& input_shapes, - const std::vector& group_sizes); + const std::vector& input_shapes); -private: - std::vector m_group_sizes; - element::Type m_dt_scale; +protected: + Attributes m_attrs; }; } // namespace internal diff --git a/src/common/transformations/include/ov_ops/lora_subgraph.hpp b/src/common/transformations/include/ov_ops/lora_subgraph.hpp new file mode 100644 index 00000000000000..75aaa16a5d280e --- /dev/null +++ b/src/common/transformations/include/ov_ops/lora_subgraph.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" +#include "openvino/op/util/sub_graph_base.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace op { +namespace internal { +/** + * @interface LoraSubgraph + * @brief LoraSubgraph operation, which is used for LoRA subgraphs fusion. + * It always has only 1 output, and the following inputs, whose order is fixed: + * 1. main_flow_input: input from original model. + * 2. LoRA_input: input to which the Low-Rank adaptation is applied. + * The adapted input is combined with `main_flow_input`. + * 3. LoRA_matrices: 3 Low-Rank adaptation matrices applied to `LoRA_input`. + * The fused subgraph can be optimized in runtime based on LoRA semantic. + * For instance, `main_flow_input` can be fast-forwarded to output in case of empty `LoRA_matrices`. + */ +class TRANSFORMATIONS_API LoraSubgraph : public ov::op::util::SubGraphOp { +public: + OPENVINO_OP("LoraSubgraph", "ie_internal_opset", ov::op::util::SubGraphOp); + + LoraSubgraph() = default; + LoraSubgraph(const OutputVector& args, const std::shared_ptr& body); + + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; +}; + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp new file mode 100644 index 00000000000000..8422ad95f262c6 --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/lora_subgraph_fusion.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API LoraSubgraphFusion; + +} // namespace pass +} // namespace ov + +class ov::pass::LoraSubgraphFusion : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("LoraSubgraphFusion", "0"); + LoraSubgraphFusion(); +}; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_slicescatter.hpp b/src/common/transformations/include/transformations/op_conversions/convert_slicescatter.hpp new file mode 100644 index 00000000000000..020b4e236fcac5 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_slicescatter.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertSliceScatter; + +} // namespace pass +} // namespace ov + +class ov::pass::ConvertSliceScatter : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertSliceScatter", "0"); + ConvertSliceScatter(); +}; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp new file mode 100644 index 00000000000000..c2ebfbc0f3138b --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { +/** + * @ingroup ov_transformation_common_api + * @brief Converts Squeeze v15 to Squeeze v0. + */ +class TRANSFORMATIONS_API ConvertSqueeze15ToSqueeze0 : public MatcherPass { +public: + OPENVINO_RTTI("ConvertSqueeze15ToSqueeze0", "0"); + ConvertSqueeze15ToSqueeze0(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/print_model.hpp b/src/common/transformations/include/transformations/utils/print_model.hpp similarity index 100% rename from src/plugins/intel_cpu/src/utils/print_model.hpp rename to src/common/transformations/include/transformations/utils/print_model.hpp diff --git a/src/common/transformations/include/transformations/utils/utils.hpp b/src/common/transformations/include/transformations/utils/utils.hpp index fed812ecc864ca..6bcc50e29b3733 100644 --- a/src/common/transformations/include/transformations/utils/utils.hpp +++ b/src/common/transformations/include/transformations/utils/utils.hpp @@ -64,18 +64,10 @@ inline bool has_decompression_converts(const std::shared_ptr& f OPENVINO_DEPRECATED("Plugins should use ov::ISyncInferRequest::find_port") inline std::string create_ie_output_name(const Output& output) { - std::string out_name; - OPENVINO_SUPPRESS_DEPRECATED_START - auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(output.get_tensor()); - OPENVINO_SUPPRESS_DEPRECATED_END - if (!tensor_name.empty()) { - out_name = std::move(tensor_name); - } else { - const auto& prev_layer = output.get_node_shared_ptr(); - out_name = prev_layer->get_friendly_name(); - if (prev_layer->get_output_size() != 1) { - out_name += "." + std::to_string(output.get_index()); - } + const auto& prev_layer = output.get_node_shared_ptr(); + auto out_name = prev_layer->get_friendly_name(); + if (prev_layer->get_output_size() != 1) { + out_name += "." + std::to_string(output.get_index()); } return out_name; } diff --git a/src/common/transformations/src/ov_ops/dynamic_quantize.cpp b/src/common/transformations/src/ov_ops/dynamic_quantize.cpp index 74c0498e9a4425..9d1dfa5e5e3f62 100644 --- a/src/common/transformations/src/ov_ops/dynamic_quantize.cpp +++ b/src/common/transformations/src/ov_ops/dynamic_quantize.cpp @@ -7,62 +7,113 @@ #include "openvino/core/partial_shape.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/op/variadic_split.hpp" -#include "variadic_split_shape_inference.hpp" +#include "openvino/util/common_util.hpp" namespace ov { namespace op { namespace internal { -DynamicQuantize::DynamicQuantize(const Output& data, std::vector group_sizes, element::Type dt_scale) - : Op({data}), - m_group_sizes(std::move(group_sizes)), - m_dt_scale(dt_scale) { - OPENVINO_ASSERT(data.get_partial_shape().rank() == m_group_sizes.size(), - "FC input rank should be same as the rank of group_size ", +DynamicQuantize::DynamicQuantize(const Output& data, const Attributes& attrs) : Op({data}), m_attrs(attrs) { + if (m_attrs.scales_zp_output_order.empty()) { + m_attrs.scales_zp_output_order.resize(data.get_partial_shape().size()); + std::iota(m_attrs.scales_zp_output_order.begin(), m_attrs.scales_zp_output_order.end(), 0); + } + + OPENVINO_ASSERT(data.get_partial_shape().rank() == m_attrs.group_sizes.size(), + "DQ input rank should be same as the rank of group_size ", data.get_tensor_ptr()->get_partial_shape().rank(), " / ", - m_group_sizes.size()); - set_output_size(2); + m_attrs.group_sizes.size()); + + OPENVINO_ASSERT(data.get_partial_shape().size() == m_attrs.scales_zp_output_order.size(), + "DQ input rank should be same as the rank of scales and zero points output order)"); + + size_t outputs_number = 2; + if (m_attrs.quantization_type == QuantizationType::Asymmetric && + m_attrs.output_storage_type == OutputStorageType::Planar) + outputs_number = 3; + + OPENVINO_ASSERT( + (m_attrs.output_storage_type == OutputStorageType::Planar) || + (m_attrs.quantization_type == QuantizationType::Asymmetric && m_attrs.scale_dt == m_attrs.zp_dt), + "Scales and Zero Points should have the same data type to be stored in the single buffer"); + + set_output_size(outputs_number); validate_and_infer_types(); } void DynamicQuantize::validate_and_infer_types() { std::vector input_shapes = {get_input_partial_shape(0)}; - auto out_shapes = shape_infer(this, input_shapes, m_group_sizes); - set_output_type(0, element::i8, out_shapes[0]); - set_output_type(1, m_dt_scale, out_shapes[1]); + auto out_shapes = shape_infer(this, input_shapes); + set_output_type(0, m_attrs.quantization_dt, out_shapes[0]); + set_output_type(1, m_attrs.scale_dt, out_shapes[1]); + + if (m_attrs.quantization_type == QuantizationType::Asymmetric && + m_attrs.output_storage_type == OutputStorageType::Planar) + set_output_type(2, m_attrs.zp_dt, out_shapes[2]); } std::shared_ptr DynamicQuantize::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), m_group_sizes, m_dt_scale); + return std::make_shared(new_args.at(0), m_attrs); } std::vector DynamicQuantize::shape_infer(const DynamicQuantize* op, - const std::vector& input_shapes, - const std::vector& group_sizes) { + const std::vector& input_shapes) { std::vector out_shapes; out_shapes.push_back(input_shapes[0]); auto scale_shape = input_shapes[0]; + const auto& group_sizes = op->m_attrs.group_sizes; OPENVINO_ASSERT(scale_shape.size() == group_sizes.size(), "Scale_shape and group_size are supposed to have same rank: ", scale_shape.size(), " / ", group_sizes.size()); for (size_t i = 0; i < scale_shape.size(); i++) { - if (scale_shape[i].is_dynamic()) + if (scale_shape[i].is_dynamic() || scale_shape[i] == 0) continue; - if (group_sizes[i] == UINT64_MAX) + if (group_sizes[i] == UINT64_MAX) { scale_shape[i] = 1; - else { - scale_shape[i] /= group_sizes[i]; // if group_size is larger than shape, scale_shape will be 1 - scale_shape[i] = std::max(static_cast(scale_shape[i].get_length()), 1); + } else { + scale_shape[i] = ov::util::ceil_div(scale_shape[i].get_length(), static_cast(group_sizes[i])); } } out_shapes.push_back(scale_shape); + + // Add zero points shape, same as the scales + if (op->m_attrs.quantization_type == QuantizationType::Asymmetric && + op->m_attrs.output_storage_type == OutputStorageType::Planar) + out_shapes.push_back(scale_shape); + + auto transpose_shape = [](const ov::PartialShape& shape, const std::vector& scales_zp_output_order) { + auto transposed_shape = shape; + for (size_t i = 0; i < scales_zp_output_order.size(); i++) { + OPENVINO_ASSERT(scales_zp_output_order[i] < transposed_shape.size()); + transposed_shape[i] = shape[scales_zp_output_order[i]]; + } + + return transposed_shape; + }; + + // Transpose scales and zero points shapes + const auto& scales_zp_output_order = op->m_attrs.scales_zp_output_order; + for (size_t i = 1; i < out_shapes.size(); i++) { + out_shapes[i] = transpose_shape(out_shapes[i], scales_zp_output_order); + } + + if (op->m_attrs.quantization_type == QuantizationType::Asymmetric && + op->m_attrs.output_storage_type != OutputStorageType::Planar) { + // Currently scales and zero points are supposed to be combined over the last dimension only + const auto combine_axis = scales_zp_output_order.empty() ? out_shapes[1].size() - 1 + : scales_zp_output_order[out_shapes[1].size() - 1]; + OPENVINO_ASSERT(group_sizes[combine_axis] != 1); + + out_shapes[1][combine_axis] *= 2; // [scale, zero_point] pairs + } + return out_shapes; } diff --git a/src/common/transformations/src/ov_ops/lora_subgraph.cpp b/src/common/transformations/src/ov_ops/lora_subgraph.cpp new file mode 100644 index 00000000000000..8a7a5a75c69c7e --- /dev/null +++ b/src/common/transformations/src/ov_ops/lora_subgraph.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_ops/lora_subgraph.hpp" + +#include "itt.hpp" + +namespace ov { +namespace op { +namespace internal { + +LoraSubgraph::LoraSubgraph(const OutputVector& args, const std::shared_ptr& body) : SubGraphOp(args) { + SubGraphOp::set_function(body); + for (size_t i = 0; i < body->get_parameters().size(); ++i) + m_input_descriptions[0].push_back(std::make_shared(i, i)); + for (size_t i = 0; i < body->get_output_size(); ++i) + m_output_descriptions[0].push_back(std::make_shared(i, i)); + constructor_validate_and_infer_types(); +} + +std::shared_ptr LoraSubgraph::clone_with_new_inputs(const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(internal_LoraSubgraph_clone_with_new_inputs); + check_new_args_count(this, new_args); + return std::make_shared(new_args, get_function()->clone()); +} + +void LoraSubgraph::validate_and_infer_types() { + INTERNAL_OP_SCOPE(internal_LoraSubgraph_validate_and_infer_types); + OPENVINO_ASSERT(get_input_size() == 5, "LoraSubgraph must have 5 inputs whereas it has ", get_input_size()); + OPENVINO_ASSERT(get_output_size() == 1, "LoraSubgraph must have 1 output whereas it has ", get_output_size()); + const auto& body = get_function(); + OPENVINO_ASSERT(body, "LoraSubgraph must have initialized body"); + validate_and_infer_type_body(body, m_input_descriptions[0]); + for (size_t i = 0; i < get_output_size(); ++i) + set_output_type(i, body->get_output_element_type(i), body->get_output_partial_shape(i)); +} + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 500d003bd4642e..37ee2d12d9aebb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -94,9 +94,11 @@ #include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp" #include "transformations/op_conversions/convert_scatter_nd_update15_downgrade.hpp" #include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" +#include "transformations/op_conversions/convert_slicescatter.hpp" #include "transformations/op_conversions/convert_softmax_downgrade.hpp" #include "transformations/op_conversions/convert_softmax_upgrade.hpp" #include "transformations/op_conversions/convert_space_to_depth.hpp" +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" #include "transformations/op_conversions/convert_subtract.hpp" #include "transformations/op_conversions/convert_topk11_downgrade.hpp" #include "transformations/op_conversions/convert_xor_to_logical_xor.hpp" @@ -233,6 +235,8 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr(); ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion) diff --git a/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp index 8840a93e07c7b9..b9bafeeff90ff0 100644 --- a/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp @@ -9,23 +9,29 @@ #include "itt.hpp" #include "openvino/core/rt_info.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/fake_quantize.hpp" #include "openvino/op/group_conv.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/optional.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" ov::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() { MATCHER_SCOPE(FakeQuantizeReshapeFusion); - const auto fq_node_p = ov::pass::pattern::wrap_type( - {ov::pass::pattern::wrap_type(), // for weights only - pattern::any_input(), - pattern::any_input(), - pattern::any_input(), - pattern::any_input()}, - pattern::consumers_count(1)); + // for weights only + const auto data_p = ov::pass::pattern::wrap_type(pattern::has_static_shape()); + const auto convert_p = ov::pass::pattern::optional(data_p, pattern::consumers_count(1)); + const auto fq_node_p = + ov::pass::pattern::wrap_type({convert_p, + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape())}, + pattern::consumers_count(1)); const auto reshape_node_p = ov::pass::pattern::wrap_type( - {fq_node_p, pattern::any_input()}, + {fq_node_p, ov::pass::pattern::wrap_type()}, [](const Output& output) { // WA: check that all Reshape node consumers are not GroupConvolution operations const auto& target_inputs = output.get_target_inputs(); @@ -36,13 +42,11 @@ ov::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() { ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); - const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr(); - if (fq_node->is_dynamic()) - return false; + const auto& fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr(); const auto& reshape_node = pattern_map.at(reshape_node_p).get_node_shared_ptr(); const auto& original_data_rank = fq_node->get_input_shape(0).size(); - OutputVector renewed_inputs = { - reshape_node->clone_with_new_inputs({fq_node->input_value(0), reshape_node->input_value(1)})}; + + OutputVector renewed_inputs = {}; for (auto i = 1; i < 5; ++i) { Output limit_input = fq_node->input_value(i); auto limit_shape = limit_input.get_shape(); @@ -62,21 +66,41 @@ ov::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() { }); const auto& new_limit_size = shape_size(new_limit_shape); if (new_limit_size == limit_size) { // we tracked future channel placement - if (new_limit_shape == limit_input.get_shape()) + if (new_limit_shape == limit_input.get_shape()) { renewed_inputs.push_back(limit_input); - else - renewed_inputs.push_back(reshape_node->clone_with_new_inputs( + } else { + auto reshaped_input = reshape_node->clone_with_new_inputs( {limit_input, - ov::op::v0::Constant::create(element::i64, {new_limit_shape.size()}, new_limit_shape)})); + ov::op::v0::Constant::create(element::i64, {new_limit_shape.size()}, new_limit_shape)}); + if (auto constant = ov::util::get_constant_from_source(reshaped_input)) { + reshaped_input = constant; + } + renewed_inputs.push_back(reshaped_input); + } continue; } } // resulting FQ will become or already is more than per-tensor / per-channel return false; } + + auto reshaped_input = + reshape_node->clone_with_new_inputs({pattern_map.at(data_p), reshape_node->input_value(1)}); + if (auto constant = ov::util::get_constant_from_source(reshaped_input)) { + reshaped_input = constant; + } + if (pattern_map.count(convert_p)) { + const auto& convert_node = pattern_map.at(convert_p).get_node_shared_ptr(); + convert_node->input(0).replace_source_output(reshaped_input); + convert_node->validate_and_infer_types(); + reshaped_input = convert_node; + } + renewed_inputs.insert(renewed_inputs.begin(), reshaped_input); + for (auto& new_input : renewed_inputs) copy_runtime_info({reshape_node, fq_node}, new_input.get_node_shared_ptr()); const auto new_fq_node = fq_node->clone_with_new_inputs(renewed_inputs); + register_new_node(new_fq_node); replace_node(reshape_node, new_fq_node); new_fq_node->set_friendly_name(reshape_node->get_friendly_name()); copy_runtime_info({fq_node, reshape_node}, new_fq_node); diff --git a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp index 221484c75cccde..8d075f4a727758 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp @@ -22,6 +22,7 @@ #include "openvino/op/parameter.hpp" #include "openvino/op/power.hpp" #include "openvino/op/tanh.hpp" +#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" @@ -280,9 +281,16 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto add_1 = ov::pass::pattern::wrap_type({tanh, add_1_constant}); auto mul_2_constant = ov::pass::pattern::wrap_type(); - auto mul_2 = ov::pass::pattern::wrap_type({add_1, mul_2_constant}); - auto mul_3 = ov::pass::pattern::wrap_type({input, mul_2}); + // x * (0.5 * (1 + tanh)) + auto mul_2_1 = ov::pass::pattern::wrap_type({add_1, mul_2_constant}); + auto mul_3_1 = ov::pass::pattern::wrap_type({input, mul_2_1}); + + // (x * 0.5) * (1 + tanh) + auto mul_2_2 = ov::pass::pattern::wrap_type({input, mul_2_constant}); + auto mul_3_2 = ov::pass::pattern::wrap_type({add_1, mul_2_2}); + + auto mul_3 = std::make_shared(OutputVector{mul_3_1, mul_3_2}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); @@ -298,7 +306,6 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { ov::as_type_ptr(pattern_to_output.at(mul_2_constant).get_node_shared_ptr()); auto add_1_constant_value = ov::as_type_ptr(pattern_to_output.at(add_1_constant).get_node_shared_ptr()); - if (!pow_constant_value || !add_1_constant_value || !mul_0_constant_value || !mul_1_constant_value || !mul_2_constant_value) { return false; @@ -318,18 +325,17 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto gelu = std::make_shared(x_output, op::GeluApproximationMode::TANH); gelu->set_friendly_name(m.get_match_root()->get_friendly_name()); - ov::copy_runtime_info( - { - pattern_to_output.at(pow).get_node_shared_ptr(), - pattern_to_output.at(mul_0).get_node_shared_ptr(), - pattern_to_output.at(mul_1).get_node_shared_ptr(), - pattern_to_output.at(mul_2).get_node_shared_ptr(), - pattern_to_output.at(mul_3).get_node_shared_ptr(), - pattern_to_output.at(tanh).get_node_shared_ptr(), - pattern_to_output.at(add_0).get_node_shared_ptr(), - pattern_to_output.at(add_1).get_node_shared_ptr(), - }, - gelu); + + std::vector> pattern_nodes = + {pow, mul_0, mul_1, tanh, add_0, add_1, mul_2_1, mul_2_2, mul_3_1, mul_3_2}; + std::vector> cp_rt_info_nodes; + for (const auto& pattern_node : pattern_nodes) { + if (pattern_to_output.count(pattern_node)) { + cp_rt_info_nodes.push_back(pattern_to_output.at(pattern_node).get_node_shared_ptr()); + } + } + ov::copy_runtime_info(cp_rt_info_nodes, gelu); + ov::replace_node(m.get_match_root(), gelu); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp new file mode 100644 index 00000000000000..95b754062c13b8 --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/lora_subgraph_fusion.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/lora_subgraph_fusion.hpp" + +#include +#include + +#include "itt.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/convolution.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/util/read_value_base.hpp" +#include "openvino/pass/pattern/op/optional.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/lora_subgraph.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::LoraSubgraphFusion::LoraSubgraphFusion() { + MATCHER_SCOPE(LoraSubgraphFusion); + using namespace pass::pattern; + auto lora_input_m = any_input(); + auto transpose_const1_m = wrap_type(consumers_count(1)); + auto transpose1_m = optional({lora_input_m, transpose_const1_m}, consumers_count(1)); + + auto read_value1_m = wrap_type(); + auto convert1_m = optional(read_value1_m, consumers_count(1)); + auto matmul1_m = wrap_type({transpose1_m, convert1_m}, consumers_count(1)); + + auto read_value2_m = wrap_type(); + auto convert2_m = optional(read_value2_m, consumers_count(1)); + auto multiply_m = wrap_type({matmul1_m, convert2_m}, consumers_count(1)); + + auto read_value3_m = wrap_type(); + auto convert3_m = optional(read_value3_m, consumers_count(1)); + auto matmul2_m = wrap_type({multiply_m, convert3_m}, consumers_count(1)); + + auto transpose_const2_m = wrap_type(consumers_count(1)); + auto transpose2_m = optional({matmul2_m, transpose_const2_m}, consumers_count(1)); + auto main_flow_m = wrap_type({lora_input_m, any_input()}); + auto add_m = wrap_type({transpose2_m, main_flow_m}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + const auto& lora_input = pattern_map.at(lora_input_m); + const auto& matmul1 = pattern_map.at(matmul1_m); + const auto& state_1 = + pattern_map.count(convert1_m) ? pattern_map.at(convert1_m) : pattern_map.at(read_value1_m); + const auto& multiply = pattern_map.at(multiply_m); + const auto& state_2 = + pattern_map.count(convert2_m) ? pattern_map.at(convert2_m) : pattern_map.at(read_value2_m); + const auto& matmul2 = pattern_map.at(matmul2_m); + const auto& state_3 = + pattern_map.count(convert3_m) ? pattern_map.at(convert3_m) : pattern_map.at(read_value3_m); + const auto& main_flow = pattern_map.at(main_flow_m); + const auto& add = pattern_map.at(add_m); + + const auto add_node = add.get_node_shared_ptr(); + if (transformation_callback(add_node)) { + return false; + } + + auto find_connected_input = [](ov::Node* child, ov::Node* parent) { + for (size_t i = 0; i < child->get_input_size(); ++i) { + auto input = child->input(i); + if (input.get_source_output().get_node() == parent) + return input; + } + OPENVINO_THROW("Ops are not connected"); + }; + + // Note: internal_inputs/external_connections order corresponds to LoraSubgraph semantic + const std::vector> internal_inputs{ + // For commutative eltwise ops, input idx may be any, so it must be computed + find_connected_input(add.get_node(), main_flow.get_node()), + pattern_map.count(transpose1_m) ? pattern_map.at(transpose1_m).get_node()->input(0) + : matmul1.get_node()->input(0), + matmul1.get_node()->input(1), + find_connected_input(multiply.get_node(), state_2.get_node()), + matmul2.get_node()->input(1), + }; + const ov::OutputVector external_connections{ + main_flow, + lora_input, + state_1, + state_2, + state_3, + }; + + ov::ParameterVector subgraph_parameters; + subgraph_parameters.reserve(internal_inputs.size()); + for (auto& in : internal_inputs) { + auto new_parameter = std::make_shared(in.get_element_type(), in.get_partial_shape()); + subgraph_parameters.push_back(new_parameter); + in.replace_source_output(new_parameter); + } + // Note: lora consumers should be taken before lora_subgraph creation, + // because only original consumers should be replaced with lora's output + const auto& lora_consumers = add.get_target_inputs(); + const auto lora_subgraph = std::make_shared(ov::OutputVector{add}, subgraph_parameters); + const auto lora_node = std::make_shared(external_connections, lora_subgraph); + ov::copy_runtime_info(m.get_matched_nodes(), lora_node); + lora_node->set_friendly_name(add_node->get_friendly_name()); + + for (const auto& consumer : lora_consumers) + consumer.replace_source_output(lora_node->output(0)); + if (!add.get_names().empty()) + lora_node->output(0).set_names(add.get_names()); + return true; + }; + + auto m = std::make_shared(add_m, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp index 1fdd69711e3af5..0d021c55ca140d 100644 --- a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp @@ -14,13 +14,15 @@ #include "openvino/op/fake_quantize.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "openvino/pass/pattern/op/optional.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" ov::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() { MATCHER_SCOPE(PullTransposeThroughFQUp); const auto weights = ov::pass::pattern::wrap_type(); - auto m_fq = pattern::wrap_type({weights, + const auto convert_p = ov::pass::pattern::optional(weights, pattern::consumers_count(1)); + auto m_fq = pattern::wrap_type({convert_p, pattern::any_input(pattern::has_static_shape()), pattern::any_input(pattern::has_static_shape()), pattern::any_input(pattern::has_static_shape()), @@ -33,25 +35,15 @@ ov::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() { auto& pattern_map = m.get_pattern_value_map(); auto transpose = pattern_map[m_transpose].get_node_shared_ptr(); auto fq = pattern_map[m_fq].get_node_shared_ptr(); - - auto are_inputs_scalars = - shape_size(fq->input_value(1).get_shape()) == 1 && shape_size(fq->input_value(2).get_shape()) == 1 && - shape_size(fq->input_value(3).get_shape()) == 1 && shape_size(fq->input_value(4).get_shape()) == 1; - if (!are_inputs_scalars) { - auto perm = ov::as_type_ptr(pattern_map[m_transpose_perm].get_node_shared_ptr()); - if (!perm) - return false; - auto perm_val = perm->cast_vector(); - if (!(perm_val[0] == 0 && perm_val[1] == 1)) - return false; - } - auto input_rank = fq->input(0).get_partial_shape().rank().get_length(); ov::NodeVector new_ops; ov::OutputVector fq_inputs; for (size_t i = 0; i < fq->inputs().size(); ++i) { auto fq_input = fq->input_value(i); + if (i == 0) { + fq_input = pattern_map[weights]; + } auto fq_input_rank = fq_input.get_partial_shape().rank().get_length(); std::vector unsqueeze_axes; for (int64_t j = 0; j < input_rank - fq_input_rank; ++j) { @@ -68,10 +60,17 @@ ov::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() { fq_input = constant; } ov::copy_runtime_info(transpose, fq_input.get_node_shared_ptr()); + if (i == 0 && pattern_map.count(convert_p)) { + const auto& convert_node = pattern_map.at(convert_p).get_node_shared_ptr(); + convert_node->input(0).replace_source_output(fq_input); + convert_node->validate_and_infer_types(); + fq_input = convert_node; + } fq_inputs.push_back(fq_input); } auto new_fq = fq->clone_with_new_inputs(fq_inputs); + register_new_node(new_fq); new_ops.push_back(new_fq); new_fq->set_friendly_name(transpose->get_friendly_name()); ov::copy_runtime_info({fq, transpose}, new_ops); diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp index f0de55f4028c94..d86b4b71f102c7 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp @@ -178,19 +178,21 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st if (std::count(std::begin(to_remove_descriptors_indexes[body_idx]), std::end(to_remove_descriptors_indexes[body_idx]), desc_idx) > 0) { - auto& body_param = body_params[body_in_descriptors[desc_idx]->m_body_parameter_index]; - body_func->remove_parameter(body_param); - // Move all body indexes which are after these indicated by to_remove_descriptors_indexes - update_body_param_desc(body_in_descriptors, - body_in_descriptors[desc_idx]->m_body_parameter_index); + if (body_in_descriptors[desc_idx]->m_body_parameter_index < body_params.size()) { + auto& body_param = body_params[body_in_descriptors[desc_idx]->m_body_parameter_index]; + body_func->remove_parameter(body_param); + // Move all body indexes which are after these indicated by to_remove_descriptors_indexes + update_body_param_desc(body_in_descriptors, + body_in_descriptors[desc_idx]->m_body_parameter_index); + } // remove dangling input of MultiSubGraphOp which was not removed earlier auto current_input_idx = body_in_descriptors[desc_idx]->m_input_index; - auto& current_input = op_inputs[current_input_idx]; // the same input tensor can go to different input ports - if (std::count(std::begin(required_inputs_indices), + if (current_input_idx < op_inputs.size() && + std::count(std::begin(required_inputs_indices), std::end(required_inputs_indices), current_input_idx) == 0 && - std::count(std::begin(op_inputs), std::end(op_inputs), current_input) > 0) { + std::count(std::begin(op_inputs), std::end(op_inputs), op_inputs[current_input_idx]) > 0) { op_inputs.erase(std::next(op_inputs.begin(), current_input_idx)); // Move all input indexes (in all bodies) which are after these indicated by // to_remove_descriptors_indexes and are not used in any body diff --git a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp index 211f351da34024..9a06201f688675 100644 --- a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp @@ -282,6 +282,15 @@ bool ov::pass::ReverseShapeAndTypeInfer::run_on_model(const std::shared_ptrget_input_tensor(0).m_element_type = element::boolean; is_changed = true; } + + // in case TensorFlow models, we can deduce predicate shape that must be a scalar + // If operations created by fusing Switch-Merge sub-graph contain tf_switch_merge_if rt-info + if (if_op->get_rt_info().count("tf_switch_merge_if") && + if_op->get_rt_info()["tf_switch_merge_if"].as() && + if_op->input_value(0).get_partial_shape().rank().is_dynamic()) { + if_op->get_input_tensor(0).m_partial_shape = ov::PartialShape({}); + is_changed = true; + } } else if (ov::as_type_ptr(op)) { is_changed |= inherit_output_shape(op, {0}); is_changed |= inherit_output_type(op, {1}); diff --git a/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp index 226093143e68d8..188d0b07684098 100644 --- a/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/shared_ops_optimization.cpp @@ -103,8 +103,11 @@ bool nodes_are_equal(const std::shared_ptr& lhs, const std::shared_ptr& model) { bool rewritten = false; - - for (const auto& op : model->get_ordered_ops()) { + std::unordered_map, size_t> index_map; + const auto& order = model->get_ordered_ops(); + for (size_t i = 0; i < order.size(); ++i) + index_map[order[i]] = i; + for (const auto& op : order) { // Recursively apply transformation for sub-graph based operations if (auto multi_subgraph_op = dynamic_pointer_cast(op)) { for (const auto& sub_graph : multi_subgraph_op->get_functions()) { @@ -124,6 +127,13 @@ bool shared_node_optimization(const shared_ptr& model) { auto& shared_nodes = item.second; if (shared_nodes.size() < 2) continue; + // sort shared_nodes so that root would be the earliest in the topological order + // it is critical for continuous application of this optimization + std::sort(shared_nodes.begin(), + shared_nodes.end(), + [&index_map](const std::shared_ptr& a, const std::shared_ptr& b) { + return index_map[a] < index_map[b]; + }); std::vector visited_nodes(shared_nodes.size(), false); for (size_t i = 0; i < visited_nodes.size(); ++i) { diff --git a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp index 6ef5536495e663..14d28a3a3fcfa3 100644 --- a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp +++ b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp @@ -54,14 +54,7 @@ bool ov::pass::UnrollIf::run_on_model(const std::shared_ptr& f) { } for (const auto& output_desc : output_descriptions) { std::shared_ptr result = body->get_results()[output_desc->m_body_value_index]; - const auto& in_value = result->input_value(0); - // set output name to Tensor to store it for openvino to cnn conversion - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name( - in_value.get_tensor(), - op::util::create_ie_output_name(if_node->output(output_desc->m_output_index))); - OPENVINO_SUPPRESS_DEPRECATED_END for (const auto& input : if_node->output(output_desc->m_output_index).get_target_inputs()) { input.replace_source_output(result->get_input_source_output(0)); } diff --git a/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp b/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp index 431527ade998a1..dde1ab35898b67 100644 --- a/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp +++ b/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp @@ -115,21 +115,6 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrget_output_descriptions()) { - // we need to insert tensor_name to the outputs of TensorIterator if they directly connected to - // Results ops. It's necessary to save original TensorIterator name when we use CNNNetwork. - auto insert_tensor_name = [&](const ov::Output& ti_output, const ov::Output& insert_to) { - auto target_inputs = ti_output.get_target_inputs(); - if (target_inputs.empty() || - std::any_of(target_inputs.begin(), target_inputs.end(), [](const ov::Input& target_inp) { - return ov::as_type(target_inp.get_node()) != nullptr; - })) { - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(insert_to.get_tensor(), - ov::op::util::create_ie_output_name(ti_output)); - OPENVINO_SUPPRESS_DEPRECATED_END - } - }; - if (const auto& concat_desc = std::dynamic_pointer_cast(desc)) { if (!concat_desc) { @@ -155,9 +140,6 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr(to_concat, concat_desc->m_axis); copy_runtime_info(sub_graph_op, concat); - // set output name to Tensor to store it for openvino to cnn conversion - insert_tensor_name(sub_graph_op->output(concat_desc->m_output_index), concat->output(0)); - // connect the Concat layer to the corresponding TI outputs for (auto& input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(concat); @@ -167,8 +149,6 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr result = body_functions[0]->get_results().at(concat_desc->m_body_value_index); const auto& input_to_res = result->get_input_source_output(0); - // set output name to Tensor to store it for openvino to cnn conversion - insert_tensor_name(sub_graph_op->output(concat_desc->m_output_index), input_to_res); for (auto& input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(input_to_res); @@ -179,11 +159,8 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrm_iteration; iter = iter >= 0 ? iter : num_iter - 1; - std::shared_ptr result = - body_functions[iter]->get_results()[output_desc->m_body_value_index]; - const auto& in_value = result->input_value(0); + auto result = body_functions[iter]->get_results()[output_desc->m_body_value_index]; - insert_tensor_name(sub_graph_op->output(output_desc->m_output_index), in_value); for (const auto& input : sub_graph_op->output(output_desc->m_output_index).get_target_inputs()) { input.replace_source_output(result->get_input_source_output(0)); } diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 54fb6a972b7387..d8fd21699a5c20 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -29,7 +29,7 @@ bool fuse_type_to_parameter(const std::shared_ptr& node, bool convert_input_precision); // this function inserts Convert operations to 'data' input and outputs of `node` -// to execute 'node' with the original type. +// to execute 'node' with the original type. This function supports nodes with single output. bool wrap_into_original_type(const std::shared_ptr& node, const precisions_map& precisions); bool store_original_type_as_attribute(const std::shared_ptr& node, const precisions_map& precisions); @@ -62,6 +62,8 @@ bool fuse_type_to_ctc_greedy_decoder_seq_len(const std::shared_ptr& no bool fuse_type_to_random_uniform_v8(const std::shared_ptr& node, const precisions_map& precisions); +bool fuse_type_to_search_sorted_v15(const std::shared_ptr& node, const precisions_map& precisions); + bool extend_select_type(const std::shared_ptr& node, const precisions_map& precisions); bool extend_reverse_type(const std::shared_ptr& node, const precisions_map& precisions); @@ -254,7 +256,6 @@ bool convert_function_precision(const std::shared_ptr& f, // Register internal constants only after fixing input type that could lead to nodes // replacement register_constants(ops); - for (auto& node : ops) { // skip precision sensitive nodes if (skip_precision_sensitive && fp16_compression_is_disabled(node) && has_fp16_compression) @@ -331,12 +332,6 @@ bool convert_function_precision(const std::shared_ptr& f, auto& convert_output_tensor = convert->get_output_tensor(0); convert_output_tensor.set_names(result_input.get_names()); - OPENVINO_SUPPRESS_DEPRECATED_START - const auto& legacy_name = ov::descriptor::get_ov_tensor_legacy_name(result_input.get_tensor()); - if (!legacy_name.empty()) { - ov::descriptor::set_ov_tensor_legacy_name(convert_output_tensor, legacy_name); - } - OPENVINO_SUPPRESS_DEPRECATED_END result_input.set_names({}); result->input(0).replace_source_output(convert->output(0)); @@ -469,7 +464,8 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& {ov::op::v13::Multinomial::get_type_info_static(), fuse_type_to_multinomial_v13}, {ov::op::v0::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, {ov::op::v8::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, - {ov::op::v0::PriorBoxClustered::get_type_info_static(), fuse_type_to_prior_box}}; + {ov::op::v0::PriorBoxClustered::get_type_info_static(), fuse_type_to_prior_box}, + {ov::op::v15::SearchSorted::get_type_info_static(), fuse_type_to_search_sorted_v15}}; for (const auto& it : m_additional_type_to_fuse_map) { type_to_fuse[it.first] = it.second; @@ -554,6 +550,18 @@ bool fuse_type_to_unique_v10(const std::shared_ptr& node, const precisions return res; } +bool fuse_type_to_search_sorted_v15(const std::shared_ptr& node, const precisions_map& precisions) { + bool res = false; + if (auto op = ov::as_type_ptr(node)) { + auto it = precisions.find(node->get_output_element_type(0)); + if (it != precisions.end()) { + op->set_output_type_attr(it->second); + res = true; + } + } + return res; +} + bool fuse_type_to_range_v4(const std::shared_ptr& node, const precisions_map& precisions) { auto it = precisions.find(node->get_output_element_type(0)); if (it == precisions.end()) @@ -622,17 +630,20 @@ bool wrap_into_original_type(const std::shared_ptr& node, const precis const auto& to = it->second; const auto& from = it->first; - - auto convert_before = std::make_shared(node->input_value(0), from); - node->input(0).replace_source_output(convert_before); - auto consumers = node->output(0).get_target_inputs(); - auto convert_after = std::make_shared(node, to); - for (auto& input : consumers) { - const auto consumer = input.get_node(); - if (ov::is_type(consumer) || ov::is_type(consumer)) { - continue; + if (node->get_input_size()) { + auto convert_before = std::make_shared(node->input_value(0), from); + node->input(0).replace_source_output(convert_before); + } + if (node->get_output_size() == 1) { + auto consumers = node->output(0).get_target_inputs(); + auto convert_after = std::make_shared(node, to); + for (auto& input : consumers) { + const auto consumer = input.get_node(); + if (ov::is_type(consumer) || ov::is_type(consumer)) { + continue; + } + input.replace_source_output(convert_after); } - input.replace_source_output(convert_after); } return true; @@ -997,12 +1008,14 @@ bool extend_select_type(const std::shared_ptr& node, const precisions_ type_relaxed->set_origin_input_type(ov::element::boolean, 0); return true; } else if (auto casted = ov::as_type_ptr(node)) { - auto relaxed_op = - std::make_shared>(*casted, - ov::element::TypeVector{ov::element::boolean}, - ov::element::TypeVector{}); - replace_node(node, relaxed_op); - return true; + if (precisions.count(ov::element::boolean) != 0) { + auto relaxed_op = + std::make_shared>(*casted, + ov::element::TypeVector{ov::element::boolean}, + ov::element::TypeVector{}); + replace_node(node, relaxed_op); + return true; + } } return false; } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp index 50a49202742bde..0e0865b3845f48 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp @@ -57,10 +57,6 @@ ov::pass::ConvertMaxPool8ToMaxPool1::ConvertMaxPool8ToMaxPool1() { ov::copy_runtime_info(maxpool_v8_node, maxpool_v1_node); maxpool_v8_node->clear_control_dependencies(); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(maxpool_v1_node->output(0).get_tensor(), out_name); - OPENVINO_SUPPRESS_DEPRECATED_END - return true; }; diff --git a/src/common/transformations/src/transformations/op_conversions/convert_slicescatter.cpp b/src/common/transformations/src/transformations/op_conversions/convert_slicescatter.cpp new file mode 100644 index 00000000000000..eedde963461a6b --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_slicescatter.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_slicescatter.hpp" + +#include +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reduce_prod.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_nd_update.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/slice_scatter.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" + +ov::pass::ConvertSliceScatter::ConvertSliceScatter() { + MATCHER_SCOPE(ConvertSliceScatter); + + const auto& slicescatter = pattern::wrap_type(); + + const matcher_pass_callback callback = [this](pattern::Matcher& m) { + const auto& slice_node = ov::as_type_ptr(m.get_match_root()); + if (!slice_node || transformation_callback(slice_node)) { + return false; + } + NodeRegistry node_registry; + const auto& const_0 = node_registry.make(ov::element::i64, Shape{}, 0); + const auto& const_1 = node_registry.make(ov::element::i64, Shape{}, 1); + const auto& const_1d_neg_1 = + node_registry.make(ov::element::i64, Shape{1}, std::vector{-1}); + const auto& const_scatter_indices_shape = + node_registry.make(ov::element::i64, Shape{2}, std::vector{-1, 1}); + const auto& data_shape = node_registry.make(slice_node->input_value(0), ov::element::i64); + const auto& num_elements_data = node_registry.make(data_shape, const_0, false); + const auto& data_indices_flatten = + node_registry.make(const_0, num_elements_data, const_1, ov::element::i64); + const auto& full_data_indices = + node_registry.make(data_indices_flatten, data_shape, false); + std::shared_ptr slice_indices; + if (slice_node->get_input_size() == 5) { + slice_indices = node_registry.make(full_data_indices, + slice_node->input_value(2), + slice_node->input_value(3), + slice_node->input_value(4)); + } else { + slice_indices = node_registry.make(full_data_indices, + slice_node->input_value(2), + slice_node->input_value(3), + slice_node->input_value(4), + slice_node->input_value(5)); + } + const auto& slice_indices_flatten = + node_registry.make(slice_indices, const_scatter_indices_shape, false); + const auto& updates_flatten = + node_registry.make(slice_node->input_value(1), const_1d_neg_1, false); + const auto& data_flatten = + node_registry.make(slice_node->input_value(0), const_1d_neg_1, false); + const auto& output_flatten = + node_registry.make(data_flatten, slice_indices_flatten, updates_flatten); + const auto& output = node_registry.make(output_flatten, data_shape, false); + + output->set_friendly_name(slice_node->get_friendly_name()); + copy_runtime_info(slice_node, node_registry.get()); + replace_node(slice_node, output); + + return true; + }; + + const auto& m = std::make_shared(slicescatter, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp new file mode 100644 index 00000000000000..50701d3d6acd56 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertSqueeze15ToSqueeze0::ConvertSqueeze15ToSqueeze0() { + MATCHER_SCOPE(ConvertSqueeze15ToSqueeze0); + + const auto& squeeze_v15_pattern = pattern::wrap_type(); + + const matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) { + const auto& squeeze_v15 = ov::as_type_ptr(m.get_match_root()); + if (!squeeze_v15 || transformation_callback(squeeze_v15)) { + return false; + } + std::shared_ptr squeeze_v0; + if (squeeze_v15->get_input_size() == 1) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0)); + } else if (squeeze_v15->get_input_size() == 2 && !squeeze_v15->get_allow_axis_skip()) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0), squeeze_v15->input_value(1)); + } else { + return false; + } + squeeze_v0->set_friendly_name(squeeze_v15->get_friendly_name()); + copy_runtime_info(squeeze_v15, squeeze_v0); + replace_node(squeeze_v15, squeeze_v0); + + return true; + }; + + auto m = std::make_shared(squeeze_v15_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp index c259e9387d9dd0..28e7cd90019b34 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp @@ -383,12 +383,18 @@ ov::pass::StateManagementPattern::StateManagementPattern(ParameterVector& kv_par auto paged_attention = std::make_shared(pa_arguments); + // The output shape of PagedAttention will be converted to [batch, 1, head_num, head_size_v], the head_size_v + // may be different from head_size_q/head_size_k. The head_size_v could be got from the shape of value input + auto hidden_dim_v = std::make_shared(std::make_shared(v_target_layout), + v0::Constant::create(element::i64, Shape{}, {-1}), + v0::Constant::create(element::i64, Shape{}, {0})); + auto pa_shape = std::make_shared( OutputVector{ v0::Constant::create(element::i64, Shape{1}, {0}), v0::Constant::create(element::i64, Shape{1}, {1}), v0::Constant::create(element::i64, Shape{1}, {-1}), - std::make_shared(hidden_dim, v0::Constant::create(element::i64, Shape{}, {0})), + std::make_shared(hidden_dim_v, v0::Constant::create(element::i64, Shape{}, {0})), }, 0); auto pa_reshape = std::make_shared(paged_attention->output(0), pa_shape, true); diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp index 27790904f4360b..55f0794e0ee008 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbol_optimization.cpp @@ -7,6 +7,7 @@ #include "itt.hpp" #include "openvino/core/bound_evaluation_util.hpp" #include "openvino/core/rt_info.hpp" +#include "openvino/core/tensor_util.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/op/add.hpp" #include "openvino/op/concat.hpp" @@ -354,12 +355,85 @@ void save_shape_sources(const std::shared_ptr& op, STS_map& symbol_sha } } } + +struct OutputValue { + std::vector value; + + bool operator==(const OutputValue& other) const { + return value == other.value; + } + + bool operator<(const OutputValue& other) const { + return std::lexicographical_compare( + std::begin(value), + std::end(value), + std::begin(other.value), + std::end(other.value), + [](const ov::Any& a, const ov::Any& b) { + // each element is either a symbol or an integer. in case they differ any integer is less than a symbol. + if (a.is>() && b.is>()) + return a.as>() < b.as>(); + if (a.is() && b.is()) + return a.as() < b.as(); + return a.is(); + }); + } + + static ov::optional make(const ov::Output& output) { + auto symbols = output.get_tensor().get_value_symbol(); + if (symbols.empty() || symbols.size() == 1) + return {}; + + const auto& lower_value = ov::util::to_vector(output.get_tensor().get_lower_value()); + const auto& upper_value = ov::util::to_vector(output.get_tensor().get_upper_value()); + const auto& et = output.get_element_type(); + bool use_values = lower_value && upper_value && (et == ov::element::i64 || et == ov::element::i32); + + std::vector symbols_as_any(symbols.size(), nullptr); + for (size_t i = 0; i < symbols_as_any.size(); ++i) { + if (use_values && lower_value->at(i) == upper_value->at(i)) + symbols_as_any[i] = lower_value->at(i); + else if (symbols.at(i) != nullptr) + symbols_as_any[i] = ov::symbol::ancestor_of(symbols.at(i)); + else + return {}; + } + return {OutputValue{std::move(symbols_as_any)}}; + } +}; + +void save_and_update_value_sources(const std::shared_ptr& op, + std::map>& multi_symbol_source) { + for (auto& output : op->outputs()) { + if (output.get_tensor().get_value_symbol().size() < 2) + continue; // singular values are handled by optimize_value_usage helper + + if (auto result = OutputValue::make(output)) { + if (multi_symbol_source.count(*result)) { + auto alternative_source = multi_symbol_source[*result]; + if (output.get_element_type() != alternative_source.get_element_type()) { + auto convert = std::make_shared(alternative_source, output.get_element_type()); + ov::copy_runtime_info(output.get_node_shared_ptr(), convert); + alternative_source = convert->output(0); + } + if (output.get_partial_shape().is_dynamic() || + output.get_partial_shape() != alternative_source.get_partial_shape()) + continue; + output.replace(alternative_source); + } else { + multi_symbol_source[*result] = output; + } + } + } +} + } // namespace bool ov::pass::OptimizeSymbolsUsedAsValues::run_on_model(const std::shared_ptr& m) { RUN_ON_FUNCTION_SCOPE(OptimizeSymbolsUsedAsValues); STS_map symbol_shape_source; STS_map symbol_value_source; + std::map> multi_symbol_source; for (const auto& op : topological_order(m)) { // Result has output port which has shared (during validate_and_infer_type) tensor with input port. // Transformations may replace input of Result. After replacement and before Result::validate_and_infer_type -- @@ -375,6 +449,7 @@ bool ov::pass::OptimizeSymbolsUsedAsValues::run_on_model(const std::shared_ptroutputs()) optimize_value_usage(output, symbol_shape_source, symbol_value_source); save_shape_sources(op, symbol_shape_source); + save_and_update_value_sources(op, multi_symbol_source); } return true; } diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp index ba950ad93bf501..ae40ca869c7dcc 100644 --- a/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp +++ b/src/common/transformations/src/transformations/transpose_sinking/ts_utils.cpp @@ -122,12 +122,6 @@ void SwapOutputNames(Output output1, Output output2) { const auto node2_output_names = output2.get_names(); output2.set_names(output1.get_names()); output1.set_names(node2_output_names); - - OPENVINO_SUPPRESS_DEPRECATED_START - const auto node2_legacy_output_names = get_ov_tensor_legacy_name(output2.get_tensor()); - set_ov_tensor_legacy_name(output2.get_tensor(), get_ov_tensor_legacy_name(output1.get_tensor())); - set_ov_tensor_legacy_name(output1.get_tensor(), node2_legacy_output_names); - OPENVINO_SUPPRESS_DEPRECATED_END } void SwapFriendlyNames(const NodePtr& node1, const NodePtr& node2) { diff --git a/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp b/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp index cc4ac2981b6799..940a5b29b8d702 100644 --- a/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp @@ -13,7 +13,10 @@ #include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" #include "openvino/opsets/opset4.hpp" +#include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/manager.hpp" +#include "transformations/common_optimizations/fq_mul_fusion.hpp" +#include "transformations/common_optimizations/pull_transpose_through_fq.hpp" #include "transformations/init_node_info.hpp" using namespace ov; @@ -66,13 +69,8 @@ class FQReshapeFusionTests : public ov::test::TestsCommon, } std::shared_ptr get_reference_function(const FQReshapeFusionTestCase& test_case) { - const auto& data = std::make_shared(element::f32, test_case.data_shape, 0); - const auto& reshaped_data = std::make_shared( - data, - std::make_shared(element::i64, - Shape{test_case.reshape_pattern.size()}, - test_case.reshape_pattern), - true); + auto shape = PartialShape(test_case.reshape_pattern).to_shape(); + const auto& data = std::make_shared(element::f32, shape, 0); const auto& p_il = std::make_shared(element::f32, test_case.il_shape); Output il = p_il; @@ -104,7 +102,7 @@ class FQReshapeFusionTests : public ov::test::TestsCommon, opset4::Constant::create(element::i64, {test_case.new_oh_shape.size()}, test_case.new_oh_shape), true); - auto fq = std::make_shared(reshaped_data, il, ih, ol, oh, 42); + auto fq = std::make_shared(data, il, ih, ol, oh, 42); auto result = std::make_shared(fq); ParameterVector params = {p_il, p_ih, p_ol, p_oh}; @@ -213,3 +211,77 @@ TEST_F(TransformationTestsF, FQReshapeGroupConvolution) { manager.register_pass(); manager.register_pass(); } + +TEST_F(TransformationTestsF, FQOptimizations) { + { + const auto& data = std::make_shared(element::u8, Shape{9, 32}, 0); + const auto& convert = std::make_shared(data, element::f32); + + const auto& il = op::v0::Constant::create(element::f32, Shape{1}, {0}); + const auto& ih = op::v0::Constant::create(element::f32, Shape{1}, {254}); + const auto& ol = op::v0::Constant::create(element::f32, Shape{32}, {-14.22}); + const auto& oh = op::v0::Constant::create(element::f32, Shape{32}, {14.22}); + + const auto& fq = std::make_shared(convert, il, ih, ol, oh, 255); + + const auto& reshape = + std::make_shared(fq, + op::v0::Constant::create(element::i64, Shape{4}, {3, 3, 32, 1}), + true); + + const auto& multiply = + std::make_shared(reshape, + op::v0::Constant::create(element::f32, Shape{1, 1, 32, 1}, {0.1140})); + + const auto& transpose = + std::make_shared(multiply, + op::v0::Constant::create(element::i64, Shape{4}, {2, 3, 0, 1})); + + const auto& reshape_to_weight = + std::make_shared(transpose, + op::v0::Constant::create(element::i64, Shape{5}, {32, 1, 1, 3, 3}), + true); + + const auto& input = std::make_shared(element::f32, PartialShape::dynamic(4)); + const auto& group_conv = std::make_shared(input, + reshape_to_weight, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + model = std::make_shared(OutputVector{group_conv}, ParameterVector{input}); + + auto fq_fusions = manager.register_pass(); + fq_fusions->add_matcher(); + fq_fusions->add_matcher(); + fq_fusions->add_matcher(); + fq_fusions->set_name("ov::pass::FakeQuantizeFusions"); + } + { + const auto& data = std::make_shared(element::u8, Shape{32, 1, 3, 3}, 0); + const auto& convert = std::make_shared(data, element::f32); + + const auto& il = op::v0::Constant::create(element::f32, Shape{1, 1, 1, 1}, {0}); + const auto& ih = op::v0::Constant::create(element::f32, Shape{1, 1, 1, 1}, {254}); + const auto& ol = op::v0::Constant::create(element::f32, Shape{32, 1, 1, 1}, {-14.22 * 0.1140}); + const auto& oh = op::v0::Constant::create(element::f32, Shape{32, 1, 1, 1}, {14.22 * 0.1140}); + + const auto& fq = std::make_shared(convert, il, ih, ol, oh, 255); + + const auto& reshape_to_weight = + std::make_shared(fq, + op::v0::Constant::create(element::i64, Shape{5}, {32, 1, 1, 3, 3}), + true); + + const auto& input = std::make_shared(element::f32, PartialShape::dynamic(4)); + const auto& group_conv = std::make_shared(input, + reshape_to_weight, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + model_ref = std::make_shared(OutputVector{group_conv}, ParameterVector{input}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp index 837d2ba6d4597e..dbc54f5492bffa 100644 --- a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp @@ -388,6 +388,44 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value) { } } +TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value_2) { + { + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = + std::make_shared(element::f32, Shape{1}, std::vector{3.0f + 1.0e-8f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); + + auto mul_1_constant = + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); + + auto tanh = std::make_shared(mul_1); + + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); + + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(input, mul_2_constant); + + auto mul_3 = std::make_shared(add_1, mul_2); + + model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); + manager.register_pass(); + } + + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); + } +} + TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) { { auto input = std::make_shared(element::f32, Shape{2, 2}); diff --git a/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp b/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp new file mode 100644 index 00000000000000..2294668f49a6c5 --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/lora_subgraph_fusion.cpp @@ -0,0 +1,290 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/lora_subgraph_fusion.hpp" + +#include + +#include + +#include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/core/model.hpp" +#include "openvino/opsets/opset15.hpp" +#include "ov_ops/lora_subgraph.hpp" +#include "transformations/utils/utils.hpp" + +using namespace testing; +using namespace ov; + +static constexpr auto netType = ov::element::f32; + +std::pair create_states(const std::vector& shapes, + const ov::element::Type& states_precision = netType) { + ov::OutputVector state_outs; + ov::SinkVector assigns; + size_t idx = 0; + auto create_state = [&](const ov::PartialShape& shape) { + auto variable = std::make_shared( + ov::op::util::VariableInfo{shape, states_precision, std::to_string(idx++)}); + auto read_value = std::make_shared(variable); + auto assign = std::make_shared(read_value, variable); + assigns.push_back(assign); + if (states_precision == netType) + state_outs.push_back(read_value); + else + state_outs.push_back(std::make_shared(read_value, netType)); + }; + for (const auto& shape : shapes) + create_state(shape); + return std::make_pair(state_outs, assigns); +} + +std::shared_ptr create_lora_subgraph(const ov::Output& main_flow, + const ov::Output& lora_input, + const ov::OutputVector& states, + bool add_transposes, + size_t mul_read_value_idx = 1, + size_t add_data_flow_idx = 0) { + OPENVINO_ASSERT(states.size() == 3, "get_lora_subgraph expects states size == 3"); + OPENVINO_ASSERT(mul_read_value_idx == 0 || mul_read_value_idx == 1, "mul_read_value_idx must be 0 or 1"); + OPENVINO_ASSERT(add_data_flow_idx == 0 || add_data_flow_idx == 1, "add_data_flow_idx must be 0 or 1"); + + auto create_transpose = [](const ov::Output& input) -> ov::Output { + auto constant = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{4}, {2, 3, 0, 1}); + return std::make_shared(input, constant); + }; + + const auto& mm1_input = add_transposes ? create_transpose(lora_input) : lora_input; + auto mm1 = std::make_shared(mm1_input, states[0], false, true); + + const auto& mul_in_0 = mul_read_value_idx == 0 ? states[1] : mm1->output(0); + const auto& mul_in_1 = mul_read_value_idx == 0 ? mm1->output(0) : states[1]; + auto mul = std::make_shared(mul_in_0, mul_in_1); + + auto mm2 = std::make_shared(mul, states[2], false, true); + + const auto& add_sec_input = add_transposes ? create_transpose(mm2) : mm2; + const auto& add_in_0 = add_data_flow_idx == 0 ? main_flow : add_sec_input; + const auto& add_in_1 = add_data_flow_idx == 0 ? add_sec_input : main_flow; + return std::make_shared(add_in_0, add_in_1); +} + +class LoraSubgraphFusionTests : public TransformationTestsF { +public: + LoraSubgraphFusionTests() : TransformationTestsF() { + // TODO: remove when these flags will be enabled in TransformationTestsF (ticket XXX-98039) + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); + } + + void SetUp() override { + TransformationTestsF::SetUp(); + manager.register_pass(); + } +}; + +class LoraSubgraphFusionMatMulTests : public LoraSubgraphFusionTests { +public: + const ov::Dimension K = 563; + const ov::Dimension N = 2048; + ov::PartialShape shape_x = {-1, -1, K}; + ov::PartialShape shape_w = {N, K}; + ov::PartialShape shape_state_1 = {-1, K}; + ov::PartialShape shape_state_2 = {1, -1}; + ov::PartialShape shape_state_3 = {N, -1}; +}; + +TEST_F(LoraSubgraphFusionMatMulTests, StandardPattern) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + auto lora_subgraph = create_lora_subgraph(main_mm, param_lora, states.first, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + model = std::make_shared(OutputVector{lora_subgraph, main_mm}, + states.second, + ParameterVector{param_lora, param_w}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_mm = std::make_shared(netType, main_mm->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_mm, inner_param_lora, states_outs, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_mm, inner_param_lora, inner_state_1, inner_state_2, inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + ov::OutputVector lora_inputs{main_mm, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = + std::make_shared(OutputVector{lora, main_mm}, states.second, ParameterVector{param_lora, param_w}); + } +} + +TEST_F(LoraSubgraphFusionMatMulTests, StandardPatternWithConvert) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}, ov::element::f16); + auto lora_subgraph = create_lora_subgraph(main_mm, param_lora, states.first, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + model = std::make_shared(OutputVector{lora_subgraph, main_mm}, + states.second, + ParameterVector{param_lora, param_w}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_mm = std::make_shared(netType, main_mm->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_mm, inner_param_lora, states_outs, false); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_mm, inner_param_lora, inner_state_1, inner_state_2, inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}, ov::element::f16); + ov::OutputVector lora_inputs{main_mm, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = + std::make_shared(OutputVector{lora, main_mm}, states.second, ParameterVector{param_lora, param_w}); + } +} + +TEST_F(LoraSubgraphFusionMatMulTests, ReshaffledEltwiseInputs) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + auto lora_subgraph = create_lora_subgraph(main_mm, param_lora, states.first, false, 0, 1); + lora_subgraph->set_friendly_name("lora_subgraph"); + + model = std::make_shared(OutputVector{lora_subgraph, main_mm}, + states.second, + ParameterVector{param_lora, param_w}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + auto main_mm = std::make_shared(param_lora, param_w, false, true); + main_mm->set_friendly_name("main_mm"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_mm = std::make_shared(netType, main_mm->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_mm, inner_param_lora, states_outs, false, 0, 1); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_mm, inner_param_lora, inner_state_1, inner_state_2, inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + ov::OutputVector lora_inputs{main_mm, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = + std::make_shared(OutputVector{lora, main_mm}, states.second, ParameterVector{param_lora, param_w}); + } +} + +class LoraSubgraphFusionConvolutionTests : public LoraSubgraphFusionTests { +public: + const ov::Dimension num_channels = 320; + ov::PartialShape shape_x = {-1, num_channels, -1, -1}; + ov::PartialShape shape_state_1 = {-1, num_channels}; + ov::PartialShape shape_state_2 = {1, -1}; + ov::PartialShape shape_state_3 = {num_channels, -1}; +}; + +TEST_F(LoraSubgraphFusionConvolutionTests, StandardPattern) { + { + auto param_lora = std::make_shared(netType, shape_x); + auto main_conv = ov::test::utils::make_convolution(param_lora, + netType, + {1, 1}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::EXPLICIT, + num_channels.get_length()); + main_conv->set_friendly_name("main_conv"); + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + auto lora_subgraph = create_lora_subgraph(main_conv, param_lora, states.first, true); + lora_subgraph->set_friendly_name("lora_subgraph"); + model = + std::make_shared(OutputVector{lora_subgraph, main_conv}, states.second, ParameterVector{param_lora}); + } + { + auto param_lora = std::make_shared(netType, shape_x); + auto main_conv = ov::test::utils::make_convolution(param_lora, + netType, + {1, 1}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::EXPLICIT, + num_channels.get_length()); + main_conv->set_friendly_name("main_conv"); + + auto inner_param_lora = std::make_shared(netType, shape_x); + auto inner_state_1 = std::make_shared(netType, shape_state_1); + auto inner_state_2 = std::make_shared(netType, shape_state_2); + auto inner_state_3 = std::make_shared(netType, shape_state_3); + auto inner_param_conv = + std::make_shared(netType, main_conv->get_output_partial_shape(0)); + + ov::OutputVector states_outs{inner_state_1, inner_state_2, inner_state_3}; + auto lora_subgraph = create_lora_subgraph(inner_param_conv, inner_param_lora, states_outs, true); + lora_subgraph->set_friendly_name("lora_subgraph"); + ov::ParameterVector inner_params{inner_param_conv, + inner_param_lora, + inner_state_1, + inner_state_2, + inner_state_3}; + auto inner_model = std::make_shared(OutputVector{lora_subgraph}, inner_params); + + auto states = create_states({shape_state_1, shape_state_2, shape_state_3}); + ov::OutputVector lora_inputs{main_conv, param_lora, states.first[0], states.first[1], states.first[2]}; + auto lora = std::make_shared(lora_inputs, inner_model); + lora->set_friendly_name("lora_subgraph"); + + model_ref = std::make_shared(OutputVector{lora, main_conv}, states.second, ParameterVector{param_lora}); + } +} diff --git a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp index 3f717726b757bc..dcba44313c8c7e 100644 --- a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp +++ b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp @@ -6,6 +6,7 @@ #include #include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/ceiling.hpp" #include "openvino/op/concat.hpp" @@ -716,3 +717,57 @@ TEST_F(SharedTransformationTestsF, SharedMaxPool) { model_ref = std::make_shared(OutputVector{concat}, ParameterVector{data}); } } + +TEST_F(SharedTransformationTestsF, TopologicalOrder) { + { + auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); + + auto shape_of = std::make_shared(data); + + auto gather_0 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + + auto gather_1 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + + auto gather_2 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + + auto add_0 = std::make_shared(gather_0, gather_0); + auto add_1 = std::make_shared(gather_1, gather_1); + auto add_2 = std::make_shared(gather_2, gather_2); + + auto concat_0 = + std::make_shared(OutputVector{gather_0, add_0, v0::Constant::create(element::i64, {1}, {0})}, + 0); + auto concat_1 = + std::make_shared(OutputVector{gather_1, add_1, v0::Constant::create(element::i64, {1}, {0})}, + 0); + auto concat_2 = + std::make_shared(OutputVector{gather_2, add_2, v0::Constant::create(element::i64, {1}, {0})}, + 0); + + auto concat = std::make_shared(OutputVector{concat_0, concat_1}, 0); + auto output = std::make_shared(OutputVector{concat, concat_2}, 0); + + model = std::make_shared(OutputVector{output}, ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); + auto shape_of = std::make_shared(data); + auto gather_0 = std::make_shared(shape_of, + v0::Constant::create(element::i32, {1}, {0}), + v0::Constant::create(element::i32, {}, {0})); + auto add_0 = std::make_shared(gather_0, gather_0); + auto concat_0 = + std::make_shared(OutputVector{gather_0, add_0, v0::Constant::create(element::i64, {1}, {0})}, + 0); + auto concat = std::make_shared(OutputVector{concat_0, concat_0}, 0); + auto output = std::make_shared(OutputVector{concat, concat_0}, 0); + model_ref = std::make_shared(OutputVector{output}, ParameterVector{data}); + } +} diff --git a/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp b/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp index 9ff2be2e754e95..fb4891d1590cc7 100644 --- a/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp +++ b/src/common/transformations/tests/control_flow/unroll_tensor_iterator_test.cpp @@ -491,14 +491,12 @@ TEST(TransformationTests, UnrollTensorIteratorLSTMCellSingleIterationSingleItera ASSERT_TRUE(res.first) << res.second; } -void collect_legacy_tensor_names(const std::shared_ptr& model, std::vector& holder) { +void collect_tensor_names(const std::shared_ptr& model, + std::vector>& holder) { for (const auto& op : model->get_ordered_ops()) { for (const auto& out : op->outputs()) { - OPENVINO_SUPPRESS_DEPRECATED_START - auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(out.get_tensor()); - OPENVINO_SUPPRESS_DEPRECATED_END - if (!tensor_name.empty() && ov::as_type_ptr(op)) - holder.emplace_back(tensor_name); + if (!out.get_tensor().get_names().empty() && ov::as_type_ptr(op)) + holder.emplace_back(out.get_tensor().get_names()); } } } @@ -538,8 +536,8 @@ TEST(TransformationTests, CheckTensorNamesAfterConvertToTIAndUnrolling) { f = std::make_shared(NodeVector{Y_out, Ho}, ParameterVector{X, Y}); } - std::vector names_before; - collect_legacy_tensor_names(f, names_before); + std::vector> names_before; + collect_tensor_names(f, names_before); pass::Manager m; m.register_pass(); @@ -548,8 +546,8 @@ TEST(TransformationTests, CheckTensorNamesAfterConvertToTIAndUnrolling) { m.run_passes(f); OV_ASSERT_NO_THROW(check_rt_info(f)); - std::vector names_after; - collect_legacy_tensor_names(f, names_after); + std::vector> names_after; + collect_tensor_names(f, names_after); EXPECT_EQ(names_before, names_after); } @@ -605,8 +603,8 @@ TEST(TransformationTests, CheckTensorNamesAfterUnrolling) { f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, Y, Z}); } - std::vector names_before; - collect_legacy_tensor_names(f, names_before); + std::vector> names_before; + collect_tensor_names(f, names_before); pass::Manager m; m.register_pass(); @@ -614,11 +612,9 @@ TEST(TransformationTests, CheckTensorNamesAfterUnrolling) { m.run_passes(f); OV_ASSERT_NO_THROW(check_rt_info(f)); - std::vector names_after; - collect_legacy_tensor_names(f, names_after); + std::vector> names_after; + collect_tensor_names(f, names_after); - EXPECT_NE(names_before, names_after); - EXPECT_EQ(names_after.size(), 2); - EXPECT_EQ(names_after[0], "TensorIterator.0"); - EXPECT_EQ(names_after[1], "TensorIterator.1"); + ASSERT_EQ(names_after.size(), 0); + EXPECT_EQ(names_before, names_after); } diff --git a/src/common/transformations/tests/op_conversions/convert_slicescatter_decomposition_test.cpp b/src/common/transformations/tests/op_conversions/convert_slicescatter_decomposition_test.cpp new file mode 100644 index 00000000000000..c3548128403624 --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_slicescatter_decomposition_test.cpp @@ -0,0 +1,145 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/opsets/opset15.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/op_conversions/convert_slicescatter.hpp" +#include "transformations/utils/utils.hpp" +namespace { +class ConvertSliceScatterTest : public TransformationTestsF, public testing::WithParamInterface { +private: + void SetUp() override { + TransformationTestsF::SetUp(); + const auto& inputs = GetParam(); + manager.register_pass(); + model = create_v15_model(inputs); + model_ref = create_decomposed_model(inputs); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); + } + +protected: + std::shared_ptr create_v15_model(ov::NodeVector inputs) { + const auto& data = inputs.at(0); + const auto& updates = inputs.at(1); + const auto& start = inputs.at(2); + const auto& stop = inputs.at(3); + const auto& step = inputs.at(4); + ov::ParameterVector params{}; + for (const auto& inp : inputs) { + const auto& param = ov::as_type_ptr(inp); + if (param) { + params.push_back(param); + } + } + std::shared_ptr slicescatter; + if (inputs.size() == 5) { + slicescatter = std::make_shared(data, updates, start, stop, step); + } else { + slicescatter = std::make_shared(data, updates, start, stop, step, inputs.at(5)); + } + slicescatter->set_friendly_name("slicescatter15"); + return std::make_shared(slicescatter->outputs(), params); + } + + std::shared_ptr create_decomposed_model(ov::NodeVector inputs) { + const auto& data = inputs.at(0); + const auto& updates = inputs.at(1); + const auto& start = inputs.at(2); + const auto& stop = inputs.at(3); + const auto& step = inputs.at(4); + ov::ParameterVector params{}; + for (const auto& inp : inputs) { + const auto& param = ov::as_type_ptr(inp); + if (param) { + params.push_back(param); + } + } + const auto& const_0 = ov::op::v0::Constant::create(ov::element::i64, {}, {0}); + const auto& const_1 = ov::op::v0::Constant::create(ov::element::i64, {}, {1}); + const auto& const_1d_neg_1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); + const auto& const_scatter_indices_shape = ov::op::v0::Constant::create(ov::element::i64, {2}, {-1, 1}); + const auto& data_shape = std::make_shared(data, ov::element::i64); + const auto& num_elements_data = std::make_shared(data_shape, const_0, false); + const auto& data_indices_flatten = + std::make_shared(const_0, num_elements_data, const_1, ov::element::i64); + const auto& full_data_indices = std::make_shared(data_indices_flatten, data_shape, false); + std::shared_ptr slice_indices; + if (inputs.size() == 5) { + slice_indices = std::make_shared(full_data_indices, start, stop, step); + } else { + slice_indices = std::make_shared(full_data_indices, start, stop, step, inputs.at(5)); + } + const auto& slice_indices_flatten = + std::make_shared(slice_indices, const_scatter_indices_shape, false); + const auto& updates_flatten = std::make_shared(updates, const_1d_neg_1, false); + const auto& data_flatten = std::make_shared(data, const_1d_neg_1, false); + const auto& output_flatten = + std::make_shared(data_flatten, slice_indices_flatten, updates_flatten); + const auto& slicescatter = std::make_shared(output_flatten, data_shape, false); + slicescatter->set_friendly_name("slicescatter15"); + return std::make_shared(slicescatter->outputs(), params); + } +}; + +INSTANTIATE_TEST_SUITE_P( + ConvertSliceScatterDecomposition, + ConvertSliceScatterTest, + testing::Values( + ov::NodeVector{ + std::make_shared(ov::element::f32, ov::Shape{256, 10, 15}), + std::make_shared(ov::element::f32, ov::Shape{4, 7, 2}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {0, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::f32, ov::Shape{256, 10, 15}), + std::make_shared(ov::element::f32, ov::Shape{4, 7, 2}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {0, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, -15, 25}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {9, 7, -3}), + ov::op::v0::Constant::create(ov::element::i32, {3}, {2, 1, -1}), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + }, + ov::NodeVector{ + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + std::make_shared(ov::element::i32, ov::PartialShape::dynamic()), + })); +TEST_P(ConvertSliceScatterTest, CompareFunctions) {} + +} // namespace diff --git a/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp new file mode 100644 index 00000000000000..f3d90ab2c748bd --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp @@ -0,0 +1,112 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset15.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/utils/utils.hpp" +using namespace ov; +using namespace testing; + +namespace { + +enum class IndicesMode { NONE, CONST, PARAM }; + +std::shared_ptr create_v15_model(const IndicesMode indices_mode, + const std::vector indices_const_val, + const bool allow_axis_skip) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data, allow_axis_skip); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset15::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +std::shared_ptr create_v1_model(const IndicesMode indices_mode, const std::vector indices_const_val) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset1::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +} // namespace + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, false); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, true); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_const_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::CONST, {0, -4, 3}, false); + model_ref = create_v1_model(IndicesMode::CONST, {0, -4, 3}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_dynamic_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, false); + model_ref = create_v1_model(IndicesMode::PARAM, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_unsupported_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, true); +} diff --git a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp index 590290fb19d84b..e4653ec084bafb 100644 --- a/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp +++ b/src/common/transformations/tests/symbolic_transformations/symbol_optimization.cpp @@ -9,11 +9,15 @@ #include "common_test_utils/ov_test_utils.hpp" #include "openvino/op/add.hpp" #include "openvino/op/concat.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/gather.hpp" +#include "openvino/op/range.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/visualize_tree.hpp" +#include "transformations/common_optimizations/shared_ops_optimization.hpp" #include "transformations/symbolic_transformations/symbolic_optimizations.hpp" #include "transformations/symbolic_transformations/utils.hpp" @@ -95,3 +99,152 @@ TEST_F(TransformationTestsF, ApplySymbolEquivalence_Concat_Values) { model_ref = make_shared(NodeVector{reshape}, ParameterVector{input_2, input_1}); } } + +Output get_dim_by_idx(const Output& source, const int64_t& idx, element::Type type = element::i64) { + auto shape = make_shared(source, type); + auto gather = make_shared(shape, + v0::Constant::create(element::i64, {}, {idx}), + v0::Constant::create(element::i64, {}, {0})); + return gather->output(0); +} + +Output get_dim_by_idx(const Output& source, + initializer_list idx, + element::Type type = element::i64) { + auto shape = make_shared(source, type); + auto gather = make_shared(shape, + v0::Constant::create(element::i64, {idx.size()}, idx), + v0::Constant::create(element::i64, {}, {0})); + return gather->output(0); +} + +TEST_F(TransformationTestsF, ValueOptimizationSingleValue) { + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + + auto dim_0 = get_dim_by_idx(input, {-1}, element::i64); + auto dim_1 = get_dim_by_idx(input, {3}, element::i32); + auto dim_2 = get_dim_by_idx(input, -1, element::i32); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_1}, 0), + false); + auto range = make_shared(v0::Constant::create(element::i32, {}, {0}), + dim_2, + v0::Constant::create(element::i32, {}, {1}), + element::i32); + + model = make_shared(NodeVector{reshape_0, reshape_1, range}, ParameterVector{input}); + + manager.set_per_pass_validation(false); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + auto dim_1 = get_dim_by_idx(input, {3}, element::i32); + auto dim_0 = std::make_shared(dim_1, element::i64); + auto dim_2 = std::make_shared(dim_1); + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_1}, 0), + false); + auto range = make_shared(v0::Constant::create(element::i32, {}, {0}), + dim_2, + v0::Constant::create(element::i32, {}, {1}), + element::i32); + + model_ref = make_shared(NodeVector{reshape_0, reshape_1, range}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ValueOptimizationDoubleValue) { + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + + auto dim_0 = get_dim_by_idx(input, {-1, -2}, element::i64); + auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i32); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_1}, 0), + false); + + model = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + + manager.set_per_pass_validation(false); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + { + auto input = make_shared(element::f32, PartialShape::dynamic(4)); + auto dim_0 = get_dim_by_idx(input, {3, 2}, element::i32); + auto dim_1 = std::make_shared(dim_0, element::i64); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_1}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {0}), dim_0}, 0), + false); + + model_ref = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ValueOptimizationSymbolAndValue) { + { + auto input = make_shared(element::f32, PartialShape({-1, -1, 4, -1})); + + auto dim_0 = get_dim_by_idx(input, {-1, -2}, element::i64); + auto dim_1 = get_dim_by_idx(input, {3, 2}, element::i32); + + auto reshape_0 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i64, {1}, {-1}), dim_0}, 0), + false); + auto reshape_1 = make_shared( + input, + make_shared(OutputVector{v0::Constant::create(element::i32, {1}, {-1}), dim_1}, 0), + false); + + model = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + + manager.set_per_pass_validation(false); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + { + auto input = make_shared(element::f32, PartialShape({-1, -1, 4, -1})); + auto dim_0 = make_shared( + OutputVector{v0::Constant::create(element::i32, {1}, {-1}), get_dim_by_idx(input, {3, 2}, element::i32)}, + 0); + auto dim_1 = std::make_shared(dim_0, element::i64); + + auto reshape_0 = make_shared(input, dim_1, false); + auto reshape_1 = make_shared(input, dim_0, false); + + model_ref = make_shared(NodeVector{reshape_0, reshape_1}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 03095b28d96a74..318f15ab1a64dc 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -15,6 +15,7 @@ #include "openvino/core/model.hpp" #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset10.hpp" +#include "openvino/opsets/opset15.hpp" #include "openvino/opsets/opset3.hpp" #include "openvino/opsets/opset4.hpp" #include "openvino/opsets/opset5.hpp" @@ -963,6 +964,34 @@ TEST(TransformationTests, ConvertPrecision_Select) { ASSERT_TRUE(has_type(f)); } +TEST(TransformationTests, ConvertPrecision_Select_Relaxed) { + std::shared_ptr f(nullptr); + { + auto input1 = std::make_shared(element::boolean, Shape{15, 20, 3}); + auto node = std::make_shared(input1); + auto select = std::make_shared(node, input1, input1); + + f = std::make_shared(OutputVector{select}, ParameterVector{input1}); + + // Explicitly setting the element type of a node to a different one to + // test the appearance of TypeRelaxed within Select + node->set_output_type(0, ov::element::u8, node->get_output_partial_shape(0)); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(precisions_map{{element::u8, element::boolean}}); + manager.run_passes(f); + } + OV_ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_TRUE(has_type(f)); + int counter = 0; + for (const auto& node : f->get_ordered_ops()) + if (std::dynamic_pointer_cast(node)) + ++counter; + ASSERT_EQ(counter, 1); +} + TEST(TransformationTests, ConvertPrecision_TypeRelaxedWithSelect) { std::shared_ptr f(nullptr); { @@ -1008,6 +1037,28 @@ TEST(TransformationTests, ConvertPrecision_TypeRelaxed) { } } +TEST(TransformationTests, ConvertPrecision_SearchSorted) { + std::shared_ptr f(nullptr); + { + auto search_sorted_input = opset15::Constant::create(ov::element::i64, {5}, {1, 2, 3, 4, 5}); + auto indices = std::make_shared(ov::element::i64, Shape{3}); + auto search_sorted = std::make_shared(search_sorted_input, indices); + + auto less_input = opset15::Constant::create(ov::element::i64, {3}, {4, 5, 6}); + auto less = std::make_shared(search_sorted, less_input); + + f = std::make_shared(OutputVector{less}, ParameterVector{indices}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(precisions_map{{element::i64, element::i32}}); + manager.run_passes(f); + } + OV_ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_TRUE(has_type(f)); +} + TEST(TransformationTests, ConvertPrecision_Variables) { std::shared_ptr f(nullptr); { @@ -2253,11 +2304,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp split->get_output_tensor(0).add_names({"split:0"}); split->get_output_tensor(1).add_names({"split:1"}); split->get_output_tensor(2).add_names({"split:2"}); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(0), "legacy_split:0"); - ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(1), "legacy_split:1"); - ov::descriptor::set_ov_tensor_legacy_name(split->get_output_tensor(2), "legacy_split:2"); - OPENVINO_SUPPRESS_DEPRECATED_END model = make_shared(split->outputs(), ParameterVector{param_1}); type_to_fuse_map empty_type_to_fuse_map = {}; @@ -2294,11 +2340,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp ASSERT_EQ("split.0", results[0]->get_input_node_ptr(0)->get_friendly_name()); ASSERT_EQ("split.1", results[1]->get_input_node_ptr(0)->get_friendly_name()); ASSERT_EQ("split.2", results[2]->get_input_node_ptr(0)->get_friendly_name()); - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_EQ("legacy_split:0", ov::descriptor::get_ov_tensor_legacy_name(results[0]->get_input_tensor(0))); - ASSERT_EQ("legacy_split:1", ov::descriptor::get_ov_tensor_legacy_name(results[1]->get_input_tensor(0))); - ASSERT_EQ("legacy_split:2", ov::descriptor::get_ov_tensor_legacy_name(results[2]->get_input_tensor(0))); - OPENVINO_SUPPRESS_DEPRECATED_END } TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { diff --git a/src/common/util/include/openvino/util/weights_path.hpp b/src/common/util/include/openvino/util/weights_path.hpp new file mode 100644 index 00000000000000..11215d8eee1304 --- /dev/null +++ b/src/common/util/include/openvino/util/weights_path.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/util/common_util.hpp" + +namespace ov { +namespace util { + +bool validate_weights_path(std::string& weights_path); + +} // namespace util +} // namespace ov diff --git a/src/common/util/src/weights_path.cpp b/src/common/util/src/weights_path.cpp new file mode 100644 index 00000000000000..56d0b56d643fa8 --- /dev/null +++ b/src/common/util/src/weights_path.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/util/weights_path.hpp" + +bool ov::util::validate_weights_path(std::string& weights_path) { + if (weights_path.empty() || !ov::util::ends_with(weights_path, ".bin")) { + return false; + } + + return true; +} diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index bc42ffca8a3cf6..5ea4a21b705489 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -49,6 +49,9 @@ target_include_directories(openvino_core_dev INTERFACE $ $) +target_include_directories(openvino_core_dev SYSTEM INTERFACE + $:$>>) + target_link_libraries(openvino_core_dev INTERFACE openvino::itt openvino::util) set_target_properties(openvino_core_dev PROPERTIES EXPORT_NAME core::dev) @@ -81,7 +84,7 @@ if(ENABLE_SYSTEM_PUGIXML) set_target_properties(openvino_core_obj PROPERTIES NO_SYSTEM_FROM_IMPORTED ON) endif() -target_compile_definitions(openvino_core_obj PRIVATE IMPLEMENT_OPENVINO_API) +target_compile_definitions(openvino_core_obj PRIVATE IMPLEMENT_OPENVINO_API XBYAK_NO_OP_NAMES XBYAK64) ov_build_target_faster(openvino_core_obj UNITY diff --git a/src/core/dev_api/openvino/core/descriptor_tensor.hpp b/src/core/dev_api/openvino/core/descriptor_tensor.hpp index 4c7c0ad8c3c972..9418183a1189fc 100644 --- a/src/core/dev_api/openvino/core/descriptor_tensor.hpp +++ b/src/core/dev_api/openvino/core/descriptor_tensor.hpp @@ -14,14 +14,5 @@ void set_element_type(Tensor& tensor, const element::Type& elemenet_type); // To change Tensor type please change the Parameter type. OPENVINO_API void set_tensor_type(Tensor& tensor, const element::Type& element_type, const PartialShape& pshape); - -OPENVINO_DEPRECATED("get_ov_tensor_legacy_name() is deprecated. Please don't use this function.") -OPENVINO_API -std::string get_ov_tensor_legacy_name(const Tensor& tensor); - -OPENVINO_DEPRECATED("set_ov_tensor_legacy_name() is deprecated. Please don't use this function.") -OPENVINO_API -void set_ov_tensor_legacy_name(Tensor& tensor, const std::string& tensor_name); - } // namespace descriptor } // namespace ov diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp new file mode 100644 index 00000000000000..fedcb030fb52cf --- /dev/null +++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/runtime_attribute.hpp" + +namespace ov { + +/** + * @brief Holds weightless caching attributes of a single constant. + * + * WeightlessCacheAttribute class represents runtime info attribute that holds + * the values of original size of the constant in bytes and the binary offset of the + * constant's data in the weights file used by the weightless caching mechanism. It's + * not copyable in case the data was changed (the original node was replaced by a new + * one produced during the tranformation pipeline) - in that case weightless caching + * can't be used for that constant. + */ +class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute { +public: + OPENVINO_RTTI("WeightlessCacheAttribute"); + + WeightlessCacheAttribute() = delete; + + WeightlessCacheAttribute(size_t original_size, size_t bin_offset) + : original_size(original_size), + bin_offset(bin_offset) {} + + bool is_copyable() const override; + + size_t original_size; + size_t bin_offset; +}; + +} // namespace ov diff --git a/src/core/dev_api/openvino/runtime/compute_hash.hpp b/src/core/dev_api/openvino/runtime/compute_hash.hpp new file mode 100644 index 00000000000000..47a90d589be4ee --- /dev/null +++ b/src/core/dev_api/openvino/runtime/compute_hash.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { +namespace runtime { + +/** + * @brief Computes the hash value for the input data + * @param src A pointer to the input data + * @param size The length of the input data in bytes + */ +size_t compute_hash(const void* src, size_t size); + +} // namespace runtime +} // namespace ov diff --git a/src/core/include/openvino/core/descriptor/tensor.hpp b/src/core/include/openvino/core/descriptor/tensor.hpp index edf2e690e8c860..a1cac56fa64e01 100644 --- a/src/core/include/openvino/core/descriptor/tensor.hpp +++ b/src/core/include/openvino/core/descriptor/tensor.hpp @@ -98,14 +98,11 @@ class OPENVINO_API Tensor { PartialShape m_partial_shape; ov::Tensor m_lower_value, m_upper_value; TensorSymbol m_value_symbol; - std::string m_legacy_name; std::unordered_set m_names; std::unordered_set::const_iterator m_name_it; RTMap m_rt_info; - friend OPENVINO_API std::string get_ov_tensor_legacy_name(const Tensor& tensor); - friend OPENVINO_API void set_ov_tensor_legacy_name(Tensor& tensor, const std::string& tensor_name); friend OPENVINO_API void set_element_type(Tensor& tensor, const element::Type& elemenet_type); friend OPENVINO_API void set_tensor_type(Tensor& tensor, const element::Type& element_type, diff --git a/src/core/include/openvino/core/node.hpp b/src/core/include/openvino/core/node.hpp index f5a63911abc502..59a4ab29253ded 100644 --- a/src/core/include/openvino/core/node.hpp +++ b/src/core/include/openvino/core/node.hpp @@ -207,6 +207,7 @@ class OPENVINO_API Node : public std::enable_shared_from_this { virtual bool evaluate_upper(ov::TensorVector& output_values) const; virtual bool evaluate_symbol(TensorSymbolVector& output_symbols) const; + virtual bool can_constant_fold(const OutputVector& inputs_values) const; virtual bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values); /// \brief Decomposes the FusedOp into a sub-graph consisting of core openvino ops /// diff --git a/src/core/include/openvino/core/parallel.hpp b/src/core/include/openvino/core/parallel.hpp index 6d3a243c95e7dc..a231c6833f9d84 100644 --- a/src/core/include/openvino/core/parallel.hpp +++ b/src/core/include/openvino/core/parallel.hpp @@ -461,8 +461,10 @@ void parallel_for(const T0& D0, const F& func) { for_1d(ithr, nthr, D0, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func); + { for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func); } #elif OV_THREAD == OV_THREAD_SEQ for_1d(0, 1, D0, func); #endif @@ -509,8 +511,10 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) { for_2d(ithr, nthr, D0, D1, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func); + { for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func); } #elif OV_THREAD == OV_THREAD_SEQ for_2d(0, 1, D0, D1, func); #endif @@ -575,8 +579,10 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) { for_3d(ithr, nthr, D0, D1, D2, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func); + { for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func); } #elif OV_THREAD == OV_THREAD_SEQ for_3d(0, 1, D0, D1, D2, func); #endif @@ -645,8 +651,10 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_4d(ithr, nthr, D0, D1, D2, D3, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func); + { for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func); } #elif OV_THREAD == OV_THREAD_SEQ for_4d(0, 1, D0, D1, D2, D3, func); #endif @@ -703,8 +711,10 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_5d(ithr, nthr, D0, D1, D2, D3, D4, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func); + { for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func); } #elif OV_THREAD == OV_THREAD_SEQ for_5d(0, 1, D0, D1, D2, D3, D4, func); #endif @@ -763,8 +773,10 @@ void parallel_for6d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_6d(ithr, nthr, D0, D1, D2, D3, D4, D5, func); }); #elif OV_THREAD == OV_THREAD_OMP +// Please note that this function does not guarantee execution on the same number of threads from call to call. +// Use the parallel_nt* functions if the procedure depends on a certain number of threads. # pragma omp parallel - for_6d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, D5, func); + { for_6d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, D5, func); } #elif OV_THREAD == OV_THREAD_SEQ for_6d(0, 1, D0, D1, D2, D3, D4, D5, func); #endif diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp index e579e8ba83210b..2af2fd272a890d 100644 --- a/src/core/include/openvino/core/type/element_type.hpp +++ b/src/core/include/openvino/core/type/element_type.hpp @@ -129,9 +129,6 @@ class OPENVINO_API Type { // Return element type in string representation std::string to_string() const; - OPENVINO_DEPRECATED("This constructor is deprecated. It will be removed in 2025.0") - Type(size_t bitwidth, bool is_real, bool is_signed, bool is_quantized, const std::string& cname); - private: Type_t m_type{Type_t::undefined}; }; diff --git a/src/core/include/openvino/core/version.hpp b/src/core/include/openvino/core/version.hpp index db1595f7addcb6..79f688795ce37c 100644 --- a/src/core/include/openvino/core/version.hpp +++ b/src/core/include/openvino/core/version.hpp @@ -19,8 +19,8 @@ * @brief Defines OpenVINO patch version */ -#define OPENVINO_VERSION_MAJOR 2024 -#define OPENVINO_VERSION_MINOR 5 +#define OPENVINO_VERSION_MAJOR 2025 +#define OPENVINO_VERSION_MINOR 0 #define OPENVINO_VERSION_PATCH 0 namespace ov { diff --git a/src/core/include/openvino/op/assign.hpp b/src/core/include/openvino/op/assign.hpp index c3f8492e54b4f8..895f6619778951 100644 --- a/src/core/include/openvino/op/assign.hpp +++ b/src/core/include/openvino/op/assign.hpp @@ -67,7 +67,7 @@ class OPENVINO_API Assign : public util::AssignBase { const TensorVector& inputs, const EvaluationContext& evaluation_context) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v6 } // namespace op diff --git a/src/core/include/openvino/op/concat.hpp b/src/core/include/openvino/op/concat.hpp index 0e8fa67c54dfae..d32171c9facbc1 100644 --- a/src/core/include/openvino/op/concat.hpp +++ b/src/core/include/openvino/op/concat.hpp @@ -36,17 +36,6 @@ class OPENVINO_API Concat : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - /// \return The concatenation axis. - OPENVINO_DEPRECATED("The function get_concatenation_axis() is deprecated. Will be removed in 2025.0 release. Use " - "get_axis() instead.") - int64_t get_concatenation_axis() const { - return m_concat_axis; - } - OPENVINO_DEPRECATED("The function set_concatenation_axis() is deprecated. Will be removed in 2025.0 release. Use " - "set_axis() instead.") - void set_concatenation_axis(int64_t concatenation_axis) { - m_concat_axis = concatenation_axis; - } /// \return The concatenation axis. int64_t get_axis() const { return m_axis; diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 62b70a883fc1a5..ccaae01586d612 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -215,7 +215,7 @@ class OPENVINO_API Constant : public Op { bool evaluate_upper(TensorVector& outputs) const override; // Don't constant fold a constant; it would make a copy - bool constant_fold(OutputVector& outputs, const OutputVector& inputs) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; /// \brief Returns the value of the constant node as a Shape object /// Can only be used on element::i64 nodes and interprets diff --git a/src/core/include/openvino/op/convert_like.hpp b/src/core/include/openvino/op/convert_like.hpp index 244d0f4c7d70b4..0d7f73075e21b9 100644 --- a/src/core/include/openvino/op/convert_like.hpp +++ b/src/core/include/openvino/op/convert_like.hpp @@ -27,6 +27,7 @@ class OPENVINO_API ConvertLike : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v1 } // namespace op diff --git a/src/core/include/openvino/op/fake_quantize.hpp b/src/core/include/openvino/op/fake_quantize.hpp index b47c7016c8709e..52caca885a02cc 100644 --- a/src/core/include/openvino/op/fake_quantize.hpp +++ b/src/core/include/openvino/op/fake_quantize.hpp @@ -69,7 +69,8 @@ class OPENVINO_API FakeQuantize : public Op { bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override { + + bool can_constant_fold(const OutputVector& inputs_values) const override { return false; } diff --git a/src/core/include/openvino/op/identity.hpp b/src/core/include/openvino/op/identity.hpp new file mode 100644 index 00000000000000..a884ba259f0002 --- /dev/null +++ b/src/core/include/openvino/op/identity.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace v16 { +/// \brief Identity operation is used as a placeholder op. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API Identity : public Op { +public: + OPENVINO_OP("Identity", "opset16"); + Identity() = default; + /** + * @brief Identity operation is used as a placeholder. It copies the tensor data to the output. + */ + Identity(const Output& data); + + bool visit_attributes(AttributeVisitor& visitor) override; + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; +}; +} // namespace v16 +} // namespace op +} // namespace ov diff --git a/src/core/include/openvino/op/lstm_sequence.hpp b/src/core/include/openvino/op/lstm_sequence.hpp index 4d68b5ceb7f9c6..28f5a2b1b8bfaf 100644 --- a/src/core/include/openvino/op/lstm_sequence.hpp +++ b/src/core/include/openvino/op/lstm_sequence.hpp @@ -17,103 +17,6 @@ namespace ov { namespace op { -namespace v0 { - -/// -/// \brief Class for lstm sequence node. -/// -/// \note It follows notation and equations defined as in ONNX standard: -/// https://github.com/onnx/onnx/blob/master/docs/Operators.md#LSTM -/// -/// \sa LSTMCell, RNNCell, GRUCell -/// -/// -/// \ingroup ov_ops_cpp_api -class OPENVINO_API OPENVINO_DEPRECATED( - "The class ov::op::v0::LSTMSequence is deprecated. It will be removed in 2025.0 release. Use " - "ov::op::v5::LSTMSequence instead.") LSTMSequence : public util::RNNCellBase { -public: - OPENVINO_OP("LSTMSequence", "opset1", util::RNNCellBase); - LSTMSequence() = default; - - using direction = RecurrentSequenceDirection; - - size_t get_default_output_index() const override { - return no_default_index(); - } - explicit LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const Output& P, - const std::int64_t hidden_size, - const direction lstm_direction, - LSTMWeightsFormat weights_format = LSTMWeightsFormat::IFCO, - const std::vector activations_alpha = {}, - const std::vector activations_beta = {}, - const std::vector activations = {"sigmoid", "tanh", "tanh"}, - const float clip_threshold = 0, - const bool input_forget = false); - - explicit LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const std::int64_t hidden_size, - const direction lstm_direction, - LSTMWeightsFormat weights_format = LSTMWeightsFormat::IFCO, - const std::vector& activations_alpha = {}, - const std::vector& activations_beta = {}, - const std::vector& activations = {"sigmoid", "tanh", "tanh"}, - const float clip_threshold = 0, - const bool input_forget = false); - - void validate_and_infer_types() override; - bool visit_attributes(AttributeVisitor& visitor) override; - - std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - - std::vector get_activations_alpha() const { - return m_activations_alpha; - } - std::vector get_activations_beta() const { - return m_activations_beta; - } - std::vector get_activations() const { - return m_activations; - } - float get_clip_threshold() const { - return m_clip; - } - direction get_direction() const { - return m_direction; - } - void set_direction(const direction& dir) { - m_direction = dir; - } - std::int64_t get_hidden_size() const { - return m_hidden_size; - } - bool get_input_forget() const { - return m_input_forget; - } - LSTMWeightsFormat get_weights_format() const { - return m_weights_format; - } - -private: - direction m_direction; - bool m_input_forget; - LSTMWeightsFormat m_weights_format; -}; -} // namespace v0 - namespace v5 { /// /// \brief Class for lstm sequence node. diff --git a/src/core/include/openvino/op/ops.hpp b/src/core/include/openvino/op/ops.hpp index 135167fe11ed5f..73510a524ef3e1 100644 --- a/src/core/include/openvino/op/ops.hpp +++ b/src/core/include/openvino/op/ops.hpp @@ -90,6 +90,7 @@ #include "openvino/op/hswish.hpp" #include "openvino/op/i420_to_bgr.hpp" #include "openvino/op/i420_to_rgb.hpp" +#include "openvino/op/identity.hpp" #include "openvino/op/idft.hpp" #include "openvino/op/if.hpp" #include "openvino/op/interpolate.hpp" diff --git a/src/core/include/openvino/op/random_uniform.hpp b/src/core/include/openvino/op/random_uniform.hpp index 6a4de83715e30a..22f06f79402135 100644 --- a/src/core/include/openvino/op/random_uniform.hpp +++ b/src/core/include/openvino/op/random_uniform.hpp @@ -42,7 +42,7 @@ class OPENVINO_API RandomUniform : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; /// \return Turns off constant folding for RandomUniform operation. - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; /// \return The output tensor type. const ov::element::Type& get_out_type() const; diff --git a/src/core/include/openvino/op/read_value.hpp b/src/core/include/openvino/op/read_value.hpp index 27447644037211..e37d6baa11c01c 100644 --- a/src/core/include/openvino/op/read_value.hpp +++ b/src/core/include/openvino/op/read_value.hpp @@ -80,7 +80,7 @@ class OPENVINO_API ReadValue : public util::ReadValueBase { const EvaluationContext& evaluation_context) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v6 } // namespace op diff --git a/src/core/include/openvino/op/reshape.hpp b/src/core/include/openvino/op/reshape.hpp index f3a9e7aa8e59c1..48bc08f8c3d947 100644 --- a/src/core/include/openvino/op/reshape.hpp +++ b/src/core/include/openvino/op/reshape.hpp @@ -52,6 +52,7 @@ class OPENVINO_API Reshape : public Op { bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; protected: bool m_special_zero; diff --git a/src/core/include/openvino/op/result.hpp b/src/core/include/openvino/op/result.hpp index dc8162a10b6627..00e805d1f2aeb5 100644 --- a/src/core/include/openvino/op/result.hpp +++ b/src/core/include/openvino/op/result.hpp @@ -30,7 +30,7 @@ class OPENVINO_API Result : public Op { bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; /// \brief Returns current layout, or empty Layout if it is not set Layout get_layout() const; diff --git a/src/core/include/openvino/op/search_sorted.hpp b/src/core/include/openvino/op/search_sorted.hpp index c370ba46b2f182..efb1f8491e0882 100644 --- a/src/core/include/openvino/op/search_sorted.hpp +++ b/src/core/include/openvino/op/search_sorted.hpp @@ -22,7 +22,15 @@ class OPENVINO_API SearchSorted : public Op { /// \param values Values to search indexs for. /// \param right_mode If False, return the first suitable index that is found for given value. If True, return /// the last such index. - SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode = false); + /// \param output_type The element type of the output tensor. This is purely an implementation flag, which + /// is used to convert the output type for CPU plugin in ConvertPrecision transformation (and potentially other + /// plugins as well). Setting this flag to element::i32 will result in the output tensor of i32 element type. + /// Setting this flag to element::i64 will generally not give any effect, since it will be converted to i32 anyway, + /// at least for CPU plugin. + SearchSorted(const Output& sorted_sequence, + const Output& values, + bool right_mode = false, + const element::Type& output_type = element::i64); void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; @@ -36,8 +44,17 @@ class OPENVINO_API SearchSorted : public Op { m_right_mode = right_mode; } + void set_output_type_attr(const element::Type& output_type) { + m_output_type = output_type; + } + + element::Type get_output_type_attr() const { + return m_output_type; + } + private: bool m_right_mode{}; + element::Type m_output_type = element::i64; }; } // namespace v15 } // namespace op diff --git a/src/core/include/openvino/op/shape_of.hpp b/src/core/include/openvino/op/shape_of.hpp index c8245d91069ed0..375d087f7e6cf8 100644 --- a/src/core/include/openvino/op/shape_of.hpp +++ b/src/core/include/openvino/op/shape_of.hpp @@ -38,6 +38,7 @@ class OPENVINO_API ShapeOf : public util::ShapeOfBase { bool evaluate_upper(TensorVector& output_values) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; private: element::Type m_output_type; @@ -64,6 +65,7 @@ class OPENVINO_API ShapeOf : public util::ShapeOfBase { bool evaluate_upper(TensorVector& output_values) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v0 } // namespace op diff --git a/src/core/include/openvino/op/squeeze.hpp b/src/core/include/openvino/op/squeeze.hpp index f7cb41f974db2f..dde456aa2eef47 100644 --- a/src/core/include/openvino/op/squeeze.hpp +++ b/src/core/include/openvino/op/squeeze.hpp @@ -4,7 +4,7 @@ #pragma once -#include "openvino/op/op.hpp" +#include "openvino/op/util/squeeze_base.hpp" namespace ov { namespace op { @@ -12,29 +12,65 @@ namespace v0 { /// \brief Squeeze operation. /// /// \ingroup ov_ops_cpp_api -class OPENVINO_API Squeeze : public Op { +class OPENVINO_API Squeeze : public util::SqueezeBase { public: OPENVINO_OP("Squeeze", "opset1"); Squeeze(); - Squeeze(const Output& data, const Output& axes); + /// \brief Constructs a squeeze v0 operation. + /// + /// \param data Input tensor with data Squeeze(const Output& data); + /// \brief Constructs a squeeze v0 operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + Squeeze(const Output& data, const Output& axes); void validate_and_infer_types() override; bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; - bool has_evaluate() const override; - bool evaluate_lower(TensorVector& outputs) const override; - bool evaluate_upper(TensorVector& outputs) const override; - bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - bool is_dynamic() const override; - private: Output get_default_axes_input() const; }; } // namespace v0 + +namespace v15 { +/// \brief Squeeze operation. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API Squeeze : public util::SqueezeBase { +public: + OPENVINO_OP("Squeeze", "opset15"); + + Squeeze(); + /// \brief Constructs a squeeze v15 operation. + /// + /// \param data Input tensor with data + /// \param allow_axis_skip Shape inference result dynamic rank if selected axis has 1 in range of its dynamic + Squeeze(const Output& data, const bool allow_axis_skip = false); + /// \brief Constructs a squeeze v15 operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + /// \param allow_axis_skip Shape inference result dynamic rank if selected axis has 1 in range of its dynamic + Squeeze(const Output& data, const Output& axes, const bool allow_axis_skip = false); + + void validate_and_infer_types() override; + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + bool visit_attributes(AttributeVisitor& visitor) override; + + bool get_allow_axis_skip() const; + +private: + Output get_default_axes_input() const; + bool m_allow_axis_skip{}; +}; +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/include/openvino/op/stft.hpp b/src/core/include/openvino/op/stft.hpp index 0a8ab80e7a48fe..0b0d8ae9a9e473 100644 --- a/src/core/include/openvino/op/stft.hpp +++ b/src/core/include/openvino/op/stft.hpp @@ -13,7 +13,7 @@ namespace v15 { /// \ingroup ov_ops_cpp_api class OPENVINO_API STFT : public Op { public: - OPENVINO_OP("STFT"); + OPENVINO_OP("STFT", "opset15"); STFT() = default; /// \brief Constructs a STFT operation. diff --git a/src/core/include/openvino/op/strided_slice.hpp b/src/core/include/openvino/op/strided_slice.hpp index 2ba4f84c0936bf..aa080bc6563b90 100644 --- a/src/core/include/openvino/op/strided_slice.hpp +++ b/src/core/include/openvino/op/strided_slice.hpp @@ -114,6 +114,7 @@ class OPENVINO_API StridedSlice : public Op { bool evaluate_upper(TensorVector& outputs) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; private: AxisSet convert_mask_to_axis_set(const std::vector& mask) const; diff --git a/src/core/include/openvino/op/unsqueeze.hpp b/src/core/include/openvino/op/unsqueeze.hpp index d9839c7d68d719..4701df2dd4d4ec 100644 --- a/src/core/include/openvino/op/unsqueeze.hpp +++ b/src/core/include/openvino/op/unsqueeze.hpp @@ -30,6 +30,7 @@ class OPENVINO_API Unsqueeze : public Op { bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; }; diff --git a/src/core/include/openvino/op/util/broadcast_base.hpp b/src/core/include/openvino/op/util/broadcast_base.hpp index 2e500eb611c04c..6300559ac8cf00 100644 --- a/src/core/include/openvino/op/util/broadcast_base.hpp +++ b/src/core/include/openvino/op/util/broadcast_base.hpp @@ -63,6 +63,7 @@ class OPENVINO_API BroadcastBase : public Op { bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; + bool evaluate_symbol(ov::TensorSymbolVector& output_symbols) const override; PartialShape get_result_shape_pdpd(const PartialShape& arg0_shape, const PartialShape& target_shape, diff --git a/src/core/include/openvino/op/util/gather_base.hpp b/src/core/include/openvino/op/util/gather_base.hpp index f7846b83cfe465..9fa8387aee6b3a 100644 --- a/src/core/include/openvino/op/util/gather_base.hpp +++ b/src/core/include/openvino/op/util/gather_base.hpp @@ -34,6 +34,7 @@ class OPENVINO_API GatherBase : public Op { bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; const int64_t& get_batch_dims() const; void set_batch_dims(int64_t batch_dims); diff --git a/src/core/include/openvino/op/util/squeeze_base.hpp b/src/core/include/openvino/op/util/squeeze_base.hpp new file mode 100644 index 00000000000000..50d960824e10d2 --- /dev/null +++ b/src/core/include/openvino/op/util/squeeze_base.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace util { +/// \brief Squeeze operation. +/// +/// \ingroup ov_ops_cpp_api +class OPENVINO_API SqueezeBase : public Op { +public: + OPENVINO_OP("Squeeze", "util"); + SqueezeBase() = default; + /// \brief Constructs a squeeze operation. + /// + /// \param data Input tensor with data + SqueezeBase(const Output& data); + /// \brief Constructs a squeeze operation. + /// + /// \param data Input tensor with data + /// \param axis The axis along which to squeeze the input tensor. + SqueezeBase(const Output& data, const Output& axes); + + bool has_evaluate() const override; + bool evaluate_lower(TensorVector& outputs) const override; + bool evaluate_upper(TensorVector& outputs) const override; + bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; + bool can_constant_fold(const OutputVector& inputs_values) const override; + bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool is_dynamic() const override; +}; +} // namespace util +} // namespace op +} // namespace ov diff --git a/src/core/include/openvino/opsets/opset.hpp b/src/core/include/openvino/opsets/opset.hpp index 7ab8c43fcec6ab..a05d5dd38d6808 100644 --- a/src/core/include/openvino/opsets/opset.hpp +++ b/src/core/include/openvino/opsets/opset.hpp @@ -168,10 +168,15 @@ const OPENVINO_API OpSet& get_opset13(); */ const OPENVINO_API OpSet& get_opset14(); /** - * @brief Returns map of available opsets + * @brief Returns opset15 * @ingroup ov_opset_cpp_api */ const OPENVINO_API OpSet& get_opset15(); +/** + * @brief Returns opset16 + * @ingroup ov_opset_cpp_api + */ +const OPENVINO_API OpSet& get_opset16(); /** * @brief Returns map of available opsets * @ingroup ov_opset_cpp_api diff --git a/src/core/include/openvino/opsets/opset1.hpp b/src/core/include/openvino/opsets/opset1.hpp index 80ea1b517e4080..e2a7a159299ad3 100644 --- a/src/core/include/openvino/opsets/opset1.hpp +++ b/src/core/include/openvino/opsets/opset1.hpp @@ -9,9 +9,7 @@ namespace ov { namespace opset1 { #define _OPENVINO_OP_REG(a, b) using b::a; -OPENVINO_SUPPRESS_DEPRECATED_START #include "openvino/opsets/opset1_tbl.hpp" -OPENVINO_SUPPRESS_DEPRECATED_END #undef _OPENVINO_OP_REG } // namespace opset1 } // namespace ov diff --git a/src/core/include/openvino/opsets/opset15_tbl.hpp b/src/core/include/openvino/opsets/opset15_tbl.hpp index a18093c4ef3f5c..8d12420719bb6b 100644 --- a/src/core/include/openvino/opsets/opset15_tbl.hpp +++ b/src/core/include/openvino/opsets/opset15_tbl.hpp @@ -7,11 +7,220 @@ # define _OPENVINO_OP_REG(x, y) #endif -// Previous opsets operators -_OPENVINO_OP_REG(Parameter, ov::op::v0) +_OPENVINO_OP_REG(Abs, ov::op::v0) +_OPENVINO_OP_REG(Acos, ov::op::v0) +_OPENVINO_OP_REG(Add, ov::op::v1) +_OPENVINO_OP_REG(Asin, ov::op::v0) +_OPENVINO_OP_REG(Atan, ov::op::v0) +_OPENVINO_OP_REG(AvgPool, ov::op::v14) +_OPENVINO_OP_REG(BatchNormInference, ov::op::v5) +_OPENVINO_OP_REG(BinaryConvolution, ov::op::v1) +_OPENVINO_OP_REG(Broadcast, ov::op::v3) +_OPENVINO_OP_REG(Bucketize, ov::op::v3) +_OPENVINO_OP_REG(CTCGreedyDecoder, ov::op::v0) +_OPENVINO_OP_REG(Ceiling, ov::op::v0) +_OPENVINO_OP_REG(Clamp, ov::op::v0) +_OPENVINO_OP_REG(Concat, ov::op::v0) +_OPENVINO_OP_REG(Constant, ov::op::v0) _OPENVINO_OP_REG(Convert, ov::op::v0) +_OPENVINO_OP_REG(ConvertLike, ov::op::v1) +_OPENVINO_OP_REG(Convolution, ov::op::v1) +_OPENVINO_OP_REG(ConvolutionBackpropData, ov::op::v1) +_OPENVINO_OP_REG(Cos, ov::op::v0) +_OPENVINO_OP_REG(Cosh, ov::op::v0) +_OPENVINO_OP_REG(CumSum, ov::op::v0) +_OPENVINO_OP_REG(DeformablePSROIPooling, ov::op::v1) +_OPENVINO_OP_REG(DepthToSpace, ov::op::v0) +_OPENVINO_OP_REG(Divide, ov::op::v1) +_OPENVINO_OP_REG(Elu, ov::op::v0) +_OPENVINO_OP_REG(Erf, ov::op::v0) +_OPENVINO_OP_REG(Equal, ov::op::v1) +_OPENVINO_OP_REG(Exp, ov::op::v0) +_OPENVINO_OP_REG(ExtractImagePatches, ov::op::v3) +_OPENVINO_OP_REG(FakeQuantize, ov::op::v0) +_OPENVINO_OP_REG(Floor, ov::op::v0) +_OPENVINO_OP_REG(FloorMod, ov::op::v1) +_OPENVINO_OP_REG(GatherTree, ov::op::v1) +_OPENVINO_OP_REG(Greater, ov::op::v1) +_OPENVINO_OP_REG(GreaterEqual, ov::op::v1) +_OPENVINO_OP_REG(GridSample, ov::op::v9) +_OPENVINO_OP_REG(GroupConvolution, ov::op::v1) +_OPENVINO_OP_REG(GroupConvolutionBackpropData, ov::op::v1) +_OPENVINO_OP_REG(GRN, ov::op::v0) +_OPENVINO_OP_REG(HardSigmoid, ov::op::v0) +_OPENVINO_OP_REG(Less, ov::op::v1) +_OPENVINO_OP_REG(LessEqual, ov::op::v1) +_OPENVINO_OP_REG(Log, ov::op::v0) +_OPENVINO_OP_REG(LogicalAnd, ov::op::v1) +_OPENVINO_OP_REG(LogicalNot, ov::op::v1) +_OPENVINO_OP_REG(LogicalOr, ov::op::v1) +_OPENVINO_OP_REG(LogicalXor, ov::op::v1) +_OPENVINO_OP_REG(LRN, ov::op::v0) +_OPENVINO_OP_REG(LSTMCell, ov::op::v4) +_OPENVINO_OP_REG(MatMul, ov::op::v0) +_OPENVINO_OP_REG(Maximum, ov::op::v1) +_OPENVINO_OP_REG(Minimum, ov::op::v1) +_OPENVINO_OP_REG(Mod, ov::op::v1) +_OPENVINO_OP_REG(Multiply, ov::op::v1) +_OPENVINO_OP_REG(Negative, ov::op::v0) +_OPENVINO_OP_REG(NormalizeL2, ov::op::v0) +_OPENVINO_OP_REG(NotEqual, ov::op::v1) +_OPENVINO_OP_REG(OneHot, ov::op::v1) +_OPENVINO_OP_REG(PRelu, ov::op::v0) +_OPENVINO_OP_REG(PSROIPooling, ov::op::v0) +_OPENVINO_OP_REG(Parameter, ov::op::v0) +_OPENVINO_OP_REG(Power, ov::op::v1) +_OPENVINO_OP_REG(PriorBoxClustered, ov::op::v0) +_OPENVINO_OP_REG(Proposal, ov::op::v4) +_OPENVINO_OP_REG(Range, ov::op::v4) +_OPENVINO_OP_REG(Relu, ov::op::v0) +_OPENVINO_OP_REG(ReduceMax, ov::op::v1) +_OPENVINO_OP_REG(ReduceLogicalAnd, ov::op::v1) +_OPENVINO_OP_REG(ReduceLogicalOr, ov::op::v1) +_OPENVINO_OP_REG(ReduceMean, ov::op::v1) +_OPENVINO_OP_REG(ReduceMin, ov::op::v1) +_OPENVINO_OP_REG(ReduceProd, ov::op::v1) +_OPENVINO_OP_REG(ReduceSum, ov::op::v1) +_OPENVINO_OP_REG(RegionYolo, ov::op::v0) +_OPENVINO_OP_REG(ReorgYolo, ov::op::v0) +_OPENVINO_OP_REG(Reshape, ov::op::v1) +_OPENVINO_OP_REG(Result, ov::op::v0) +_OPENVINO_OP_REG(ReverseSequence, ov::op::v0) +_OPENVINO_OP_REG(ROIPooling, ov::op::v0) +_OPENVINO_OP_REG(Select, ov::op::v1) +_OPENVINO_OP_REG(Selu, ov::op::v0) +_OPENVINO_OP_REG(Sign, ov::op::v0) +_OPENVINO_OP_REG(Sigmoid, ov::op::v0) +_OPENVINO_OP_REG(Sin, ov::op::v0) +_OPENVINO_OP_REG(Sinh, ov::op::v0) +_OPENVINO_OP_REG(Sqrt, ov::op::v0) +_OPENVINO_OP_REG(SpaceToDepth, ov::op::v0) +_OPENVINO_OP_REG(Split, ov::op::v1) +_OPENVINO_OP_REG(SquaredDifference, ov::op::v0) +_OPENVINO_OP_REG(Squeeze, ov::op::v15) +_OPENVINO_OP_REG(StridedSlice, ov::op::v1) +_OPENVINO_OP_REG(Subtract, ov::op::v1) +_OPENVINO_OP_REG(Tan, ov::op::v0) +_OPENVINO_OP_REG(Tanh, ov::op::v0) +_OPENVINO_OP_REG(TensorIterator, ov::op::v0) +_OPENVINO_OP_REG(Tile, ov::op::v0) +_OPENVINO_OP_REG(Transpose, ov::op::v1) +_OPENVINO_OP_REG(Unsqueeze, ov::op::v0) +_OPENVINO_OP_REG(VariadicSplit, ov::op::v1) + +// New operations added in opset2 +_OPENVINO_OP_REG(BatchToSpace, ov::op::v1) +_OPENVINO_OP_REG(SpaceToBatch, ov::op::v1) + +// New operations added in opset3 +_OPENVINO_OP_REG(EmbeddingBagPackedSum, ov::op::v3) +_OPENVINO_OP_REG(EmbeddingSegmentsSum, ov::op::v3) +_OPENVINO_OP_REG(EmbeddingBagOffsetsSum, ov::op::v3) +_OPENVINO_OP_REG(GRUCell, ov::op::v3) +_OPENVINO_OP_REG(NonZero, ov::op::v3) +_OPENVINO_OP_REG(RNNCell, ov::op::v0) +_OPENVINO_OP_REG(ScatterUpdate, ov::op::v3) +_OPENVINO_OP_REG(ShuffleChannels, ov::op::v0) _OPENVINO_OP_REG(ShapeOf, ov::op::v3) +// New operations added in opset4 +_OPENVINO_OP_REG(Acosh, ov::op::v3) +_OPENVINO_OP_REG(Asinh, ov::op::v3) +_OPENVINO_OP_REG(Atanh, ov::op::v3) +_OPENVINO_OP_REG(CTCLoss, ov::op::v4) +_OPENVINO_OP_REG(HSwish, ov::op::v4) +_OPENVINO_OP_REG(Mish, ov::op::v4) +_OPENVINO_OP_REG(ReduceL1, ov::op::v4) +_OPENVINO_OP_REG(ReduceL2, ov::op::v4) +_OPENVINO_OP_REG(SoftPlus, ov::op::v4) +_OPENVINO_OP_REG(Swish, ov::op::v4) + +// New operations added in opset5 +_OPENVINO_OP_REG(GRUSequence, ov::op::v5) +_OPENVINO_OP_REG(HSigmoid, ov::op::v5) +_OPENVINO_OP_REG(LogSoftmax, ov::op::v5) +_OPENVINO_OP_REG(Loop, ov::op::v5) +_OPENVINO_OP_REG(LSTMSequence, ov::op::v5) +_OPENVINO_OP_REG(RNNSequence, ov::op::v5) +_OPENVINO_OP_REG(Round, ov::op::v5) + +// New operations added in opset6 +_OPENVINO_OP_REG(CTCGreedyDecoderSeqLen, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronDetectionOutput, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronGenerateProposalsSingleImage, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronPriorGridGenerator, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronROIFeatureExtractor, ov::op::v6) +_OPENVINO_OP_REG(ExperimentalDetectronTopKROIs, ov::op::v6) +_OPENVINO_OP_REG(GatherElements, ov::op::v6) +_OPENVINO_OP_REG(MVN, ov::op::v6) +_OPENVINO_OP_REG(Assign, ov::op::v6) // new version +_OPENVINO_OP_REG(ReadValue, ov::op::v6) // new version + +// New operations added in opset7 +_OPENVINO_OP_REG(DFT, ov::op::v7) +_OPENVINO_OP_REG(Einsum, ov::op::v7) +_OPENVINO_OP_REG(Gelu, ov::op::v7) +_OPENVINO_OP_REG(IDFT, ov::op::v7) +_OPENVINO_OP_REG(Roll, ov::op::v7) + +// New operations added in opset8 +_OPENVINO_OP_REG(Gather, ov::op::v8) +_OPENVINO_OP_REG(GatherND, ov::op::v8) +_OPENVINO_OP_REG(AdaptiveAvgPool, ov::op::v8) +_OPENVINO_OP_REG(AdaptiveMaxPool, ov::op::v8) +_OPENVINO_OP_REG(DeformableConvolution, ov::op::v8) +_OPENVINO_OP_REG(DetectionOutput, ov::op::v8) +_OPENVINO_OP_REG(I420toBGR, ov::op::v8) +_OPENVINO_OP_REG(I420toRGB, ov::op::v8) +_OPENVINO_OP_REG(MatrixNms, ov::op::v8) +_OPENVINO_OP_REG(MaxPool, ov::op::v14) +_OPENVINO_OP_REG(NV12toBGR, ov::op::v8) +_OPENVINO_OP_REG(NV12toRGB, ov::op::v8) +_OPENVINO_OP_REG(RandomUniform, ov::op::v8) +_OPENVINO_OP_REG(Slice, ov::op::v8) +_OPENVINO_OP_REG(Softmax, ov::op::v8) +_OPENVINO_OP_REG(If, ov::op::v8) +_OPENVINO_OP_REG(PriorBox, ov::op::v8) + +// New operations added in opset9 +_OPENVINO_OP_REG(IRDFT, ov::op::v9) +_OPENVINO_OP_REG(RDFT, ov::op::v9) +_OPENVINO_OP_REG(Eye, ov::op::v9) +_OPENVINO_OP_REG(NonMaxSuppression, ov::op::v9) +_OPENVINO_OP_REG(ROIAlign, ov::op::v9) +_OPENVINO_OP_REG(SoftSign, ov::op::v9) +_OPENVINO_OP_REG(GenerateProposals, ov::op::v9) +_OPENVINO_OP_REG(MulticlassNms, ov::op::v9) + +// New operations added in opset10 +_OPENVINO_OP_REG(IsFinite, ov::op::v10) +_OPENVINO_OP_REG(IsInf, ov::op::v10) +_OPENVINO_OP_REG(IsNaN, ov::op::v10) +_OPENVINO_OP_REG(Unique, ov::op::v10) + +// New operations added in opset11 +_OPENVINO_OP_REG(Interpolate, ov::op::v11) +_OPENVINO_OP_REG(TopK, ov::op::v11) + +// New operations added in opset12 +_OPENVINO_OP_REG(GroupNormalization, ov::op::v12) +_OPENVINO_OP_REG(Pad, ov::op::v12) +_OPENVINO_OP_REG(ScatterElementsUpdate, ov::op::v12) + +// New operations added in opset13 +_OPENVINO_OP_REG(BitwiseAnd, ov::op::v13) +_OPENVINO_OP_REG(BitwiseNot, ov::op::v13) +_OPENVINO_OP_REG(BitwiseOr, ov::op::v13) +_OPENVINO_OP_REG(BitwiseXor, ov::op::v13) +_OPENVINO_OP_REG(NMSRotated, ov::op::v13) +_OPENVINO_OP_REG(Multinomial, ov::op::v13) +_OPENVINO_OP_REG(ScaledDotProductAttention, ov::op::v13) +_OPENVINO_OP_REG(FakeConvert, ov::op::v13) + +// New operations added in opset14 +_OPENVINO_OP_REG(ConvertPromoteTypes, ov::op::v14) +_OPENVINO_OP_REG(Inverse, ov::op::v14) + // New operations added in opset15 _OPENVINO_OP_REG(ROIAlignRotated, ov::op::v15) _OPENVINO_OP_REG(ScatterNDUpdate, ov::op::v15) diff --git a/src/core/include/openvino/opsets/opset16.hpp b/src/core/include/openvino/opsets/opset16.hpp new file mode 100644 index 00000000000000..0e2c6c5452f661 --- /dev/null +++ b/src/core/include/openvino/opsets/opset16.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/ops.hpp" + +namespace ov { +namespace opset16 { +#define _OPENVINO_OP_REG(a, b) using b::a; +#include "openvino/opsets/opset16_tbl.hpp" +#undef _OPENVINO_OP_REG +} // namespace opset16 +} // namespace ov diff --git a/src/core/include/openvino/opsets/opset16_tbl.hpp b/src/core/include/openvino/opsets/opset16_tbl.hpp new file mode 100644 index 00000000000000..4038aa17b72750 --- /dev/null +++ b/src/core/include/openvino/opsets/opset16_tbl.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef _OPENVINO_OP_REG +# warning "_OPENVINO_OP_REG not defined" +# define _OPENVINO_OP_REG(x, y) +#endif + +// Previous opsets operators +// TODO (ticket: 156877): Add remaining operators from opset15 at the end of opset16 development +_OPENVINO_OP_REG(Parameter, ov::op::v0) +_OPENVINO_OP_REG(Convert, ov::op::v0) +_OPENVINO_OP_REG(ShapeOf, ov::op::v3) + +// New operations added in opset16 +_OPENVINO_OP_REG(Identity, ov::op::v16) diff --git a/src/core/include/openvino/opsets/opset1_tbl.hpp b/src/core/include/openvino/opsets/opset1_tbl.hpp index 089df5896e664c..3008e450065926 100644 --- a/src/core/include/openvino/opsets/opset1_tbl.hpp +++ b/src/core/include/openvino/opsets/opset1_tbl.hpp @@ -88,7 +88,6 @@ _OPENVINO_OP_REG(LogicalOr, ov::op::v1) _OPENVINO_OP_REG(LogicalXor, ov::op::v1) _OPENVINO_OP_REG(LRN, ov::op::v0) _OPENVINO_OP_REG(LSTMCell, ov::op::v0) -_OPENVINO_OP_REG(LSTMSequence, ov::op::v0) _OPENVINO_OP_REG(MatMul, ov::op::v0) _OPENVINO_OP_REG(MaxPool, ov::op::v1) _OPENVINO_OP_REG(Maximum, ov::op::v1) diff --git a/src/core/include/openvino/opsets/opset2.hpp b/src/core/include/openvino/opsets/opset2.hpp index 3c0888c0a88487..bcdaa6a1bc354b 100644 --- a/src/core/include/openvino/opsets/opset2.hpp +++ b/src/core/include/openvino/opsets/opset2.hpp @@ -9,9 +9,7 @@ namespace ov { namespace opset2 { #define _OPENVINO_OP_REG(a, b) using b::a; -OPENVINO_SUPPRESS_DEPRECATED_START #include "openvino/opsets/opset2_tbl.hpp" -OPENVINO_SUPPRESS_DEPRECATED_END #undef _OPENVINO_OP_REG } // namespace opset2 } // namespace ov diff --git a/src/core/include/openvino/opsets/opset2_tbl.hpp b/src/core/include/openvino/opsets/opset2_tbl.hpp index 1d7c55ea74549e..e14887aaced94b 100644 --- a/src/core/include/openvino/opsets/opset2_tbl.hpp +++ b/src/core/include/openvino/opsets/opset2_tbl.hpp @@ -57,7 +57,6 @@ _OPENVINO_OP_REG(LogicalOr, ov::op::v1) _OPENVINO_OP_REG(LogicalXor, ov::op::v1) _OPENVINO_OP_REG(LRN, ov::op::v0) _OPENVINO_OP_REG(LSTMCell, ov::op::v0) -_OPENVINO_OP_REG(LSTMSequence, ov::op::v0) _OPENVINO_OP_REG(MatMul, ov::op::v0) _OPENVINO_OP_REG(MaxPool, ov::op::v1) _OPENVINO_OP_REG(Maximum, ov::op::v1) diff --git a/src/core/include/openvino/opsets/opset3.hpp b/src/core/include/openvino/opsets/opset3.hpp index 29cd23b89c1111..a4e99a21e1c643 100644 --- a/src/core/include/openvino/opsets/opset3.hpp +++ b/src/core/include/openvino/opsets/opset3.hpp @@ -9,9 +9,7 @@ namespace ov { namespace opset3 { #define _OPENVINO_OP_REG(a, b) using b::a; -OPENVINO_SUPPRESS_DEPRECATED_START #include "openvino/opsets/opset3_tbl.hpp" -OPENVINO_SUPPRESS_DEPRECATED_END #undef _OPENVINO_OP_REG } // namespace opset3 } // namespace ov diff --git a/src/core/include/openvino/opsets/opset3_tbl.hpp b/src/core/include/openvino/opsets/opset3_tbl.hpp index 3c34a6999f2696..5c16414a02e987 100644 --- a/src/core/include/openvino/opsets/opset3_tbl.hpp +++ b/src/core/include/openvino/opsets/opset3_tbl.hpp @@ -60,7 +60,6 @@ _OPENVINO_OP_REG(LogicalOr, ov::op::v1) _OPENVINO_OP_REG(LogicalXor, ov::op::v1) _OPENVINO_OP_REG(LRN, ov::op::v0) _OPENVINO_OP_REG(LSTMCell, ov::op::v0) -_OPENVINO_OP_REG(LSTMSequence, ov::op::v0) _OPENVINO_OP_REG(MatMul, ov::op::v0) _OPENVINO_OP_REG(MaxPool, ov::op::v1) _OPENVINO_OP_REG(Maximum, ov::op::v1) diff --git a/src/core/reference/CMakeLists.txt b/src/core/reference/CMakeLists.txt index f7874964233cf5..b62cf02f23f4f1 100644 --- a/src/core/reference/CMakeLists.txt +++ b/src/core/reference/CMakeLists.txt @@ -50,9 +50,6 @@ target_include_directories(${TARGET_NAME} PUBLIC $ $) -target_include_directories(${TARGET_NAME} SYSTEM PRIVATE - $:$>>) - find_package(Threads REQUIRED) target_link_libraries(${TARGET_NAME} PRIVATE Threads::Threads openvino::core::dev) diff --git a/src/core/reference/include/openvino/reference/identity.hpp b/src/core/reference/include/openvino/reference/identity.hpp new file mode 100644 index 00000000000000..7ff48b76560e09 --- /dev/null +++ b/src/core/reference/include/openvino/reference/identity.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { +namespace reference { +/** + * @brief Identity operation computes the identity of the input tensor. + * + * @param input Input matrix (matrices) pointer. + * @param output Output matrix (matrices) pointer. + * @param size_in_bytes Size of the input tensor in bytes. + **/ +static inline void identity(const char* input, char* output, const size_t size_in_bytes) { + if (input == output) { + return; + } else { + std::memcpy(output, input, size_in_bytes); + } +} +} // namespace reference +} // namespace ov diff --git a/src/core/reference/include/openvino/reference/search_sorted.hpp b/src/core/reference/include/openvino/reference/search_sorted.hpp index ca5361c388c621..7ea8ec1078a2a1 100644 --- a/src/core/reference/include/openvino/reference/search_sorted.hpp +++ b/src/core/reference/include/openvino/reference/search_sorted.hpp @@ -4,6 +4,7 @@ #pragma once +#include "openvino/core/parallel.hpp" #include "openvino/core/shape.hpp" #include "openvino/reference/utils/coordinate_index.hpp" #include "openvino/reference/utils/coordinate_transform.hpp" @@ -30,7 +31,13 @@ void search_sorted(const T* sorted, }; } - for (const Coordinate& values_coord : values_transform) { + const size_t size = shape_size(values_shape); + + auto func = [&](size_t i) { + auto it = values_transform.begin(); + it += i; + const Coordinate& values_coord = *it; + const auto values_index = coordinate_index(values_coord, values_shape); const T value = values[values_index]; @@ -48,7 +55,9 @@ void search_sorted(const T* sorted, const ptrdiff_t sorted_index = (idx_ptr - sorted) - sorted_index_begin; out[values_index] = static_cast(sorted_index); - } + }; + + ov::parallel_for(size, func); } } // namespace reference diff --git a/src/core/reference/include/openvino/reference/sequences.hpp b/src/core/reference/include/openvino/reference/sequences.hpp index af1946fcbe72c9..42b4083793b63b 100644 --- a/src/core/reference/include/openvino/reference/sequences.hpp +++ b/src/core/reference/include/openvino/reference/sequences.hpp @@ -380,131 +380,6 @@ void lstm_sequence(const char* X, } } -template -void lstm_sequence_v1(const char* X, - const Shape& X_shape, - const char* H, - const Shape& H_shape, - const char* C, - const Shape& C_shape, - const char* seq_lengths, - const Shape& seq_lengths_shape, - const char* W, - const Shape& W_shape, - const char* R, - const Shape& R_shape, - const char* B, - const Shape& B_shape, - const char* P, - const Shape& P_shape, - char* Y, - char* Ho, - char* Co, - const std::string& activation_f, - const std::string& activation_g, - const std::string& activation_h, - float clip, - const ov::op::LSTMWeightsFormat weight_format, - bool input_forget, - op::RecurrentSequenceDirection direction) { - OutputVector results; - if (direction == op::RecurrentSequenceDirection::FORWARD || direction == op::RecurrentSequenceDirection::REVERSE) { - CellArgs args; - args.activation_f = activation_f; - args.activation_g = activation_g; - args.activation_h = activation_h; - args.clip = clip; - args.weight_format = weight_format; - args.input_forget = input_forget; - std::vector inputs = {X, seq_lengths, H, C, W, R, B, P}; - std::vector outputs = {Y, Ho, Co}; - std::vector shapes = {X_shape, seq_lengths_shape, H_shape, C_shape, W_shape, R_shape, B_shape, P_shape}; - cell_pass(CellType::LSTM_v1, - inputs, - shapes, - outputs, - args, - direction == op::RecurrentSequenceDirection::REVERSE); - } else if (direction == op::RecurrentSequenceDirection::BIDIRECTIONAL) { - // Split bidirectional case to forward + reverse passes. - // split inputs - std::vector> H_split(2, std::vector(sizeof(T) * shape_size(H_shape) / 2)); - std::vector> C_split(2, std::vector(sizeof(T) * shape_size(C_shape) / 2)); - std::vector> W_split(2, std::vector(sizeof(T) * shape_size(W_shape) / 2)); - std::vector> R_split(2, std::vector(sizeof(T) * shape_size(R_shape) / 2)); - std::vector> B_split(2, std::vector(sizeof(T) * shape_size(B_shape) / 2)); - std::vector> P_split(2, std::vector(sizeof(T) * shape_size(P_shape) / 2)); - char* h_pointers[2] = {H_split[0].data(), H_split[1].data()}; - char* c_pointers[2] = {C_split[0].data(), C_split[1].data()}; - char* w_pointers[2] = {W_split[0].data(), W_split[1].data()}; - char* r_pointers[2] = {R_split[0].data(), R_split[1].data()}; - char* b_pointers[2] = {B_split[0].data(), B_split[1].data()}; - char* p_pointers[2] = {P_split[0].data(), P_split[1].data()}; - reference::split(H, H_shape, sizeof(T), 1, 2, h_pointers); - reference::split(C, C_shape, sizeof(T), 1, 2, c_pointers); - reference::split(W, W_shape, sizeof(T), 0, 2, w_pointers); - reference::split(R, R_shape, sizeof(T), 0, 2, r_pointers); - reference::split(B, B_shape, sizeof(T), 0, 2, b_pointers); - reference::split(P, P_shape, sizeof(T), 0, 2, p_pointers); - std::vector forward_res_y(sizeof(T) * H_shape[0] * H_shape[2] * X_shape[1]); - std::vector reverse_res_y(sizeof(T) * H_shape[0] * H_shape[2] * X_shape[1]); - std::vector> forward_res(2, std::vector(sizeof(T) * H_shape[0] * H_shape[2])); - std::vector> reverse_res(2, std::vector(sizeof(T) * H_shape[0] * H_shape[2])); - - CellArgs args; - args.activation_f = activation_f; - args.activation_g = activation_g; - args.activation_h = activation_h; - args.clip = clip; - args.weight_format = weight_format; - args.input_forget = input_forget; - std::vector shapes = {X_shape, seq_lengths_shape, H_shape, C_shape, W_shape, R_shape, B_shape, P_shape}; - // update H,C,W,R,B,P shapes after split - shapes[2][1] = 1; - shapes[3][1] = 1; - for (size_t i = 4; i < shapes.size(); ++i) { - shapes[i][0] = 1; - } - // forward pass - cell_pass( - CellType::LSTM_v1, - {X, seq_lengths, h_pointers[0], c_pointers[0], w_pointers[0], r_pointers[0], b_pointers[0], p_pointers[0]}, - shapes, - {forward_res_y.data(), forward_res[0].data(), forward_res[1].data()}, - args, - false); - // reverse pass - cell_pass( - CellType::LSTM_v1, - {X, seq_lengths, h_pointers[1], c_pointers[1], w_pointers[1], r_pointers[1], b_pointers[1], p_pointers[1]}, - shapes, - {reverse_res_y.data(), reverse_res[0].data(), reverse_res[1].data()}, - args, - true); - - // Stack together respective outputs from both forward and reverse passes. - std::vector in_shapes_y = {{H_shape[0], 1, X_shape[1], H_shape[2]}, - {H_shape[0], 1, X_shape[1], H_shape[2]}}; - std::vector in_shapes_h_c = {{H_shape[0], 1, H_shape[2]}, {H_shape[0], 1, H_shape[2]}}; - Shape output_shape_y{H_shape[0], 2, X_shape[1], H_shape[2]}; - Shape output_shape_h_c{H_shape[0], 2, H_shape[2]}; - - reference::concat({forward_res_y.data(), reverse_res_y.data()}, Y, in_shapes_y, output_shape_y, 1, sizeof(T)); - reference::concat({forward_res[0].data(), reverse_res[0].data()}, - Ho, - in_shapes_h_c, - output_shape_h_c, - 1, - sizeof(T)); - reference::concat({forward_res[1].data(), reverse_res[1].data()}, - Co, - in_shapes_h_c, - output_shape_h_c, - 1, - sizeof(T)); - } -} - template void gru_sequence(const char* X, const Shape& X_shape, diff --git a/src/core/reference/src/op/jit_generator.hpp b/src/core/reference/include/openvino/reference/utils/jit_generator.hpp similarity index 59% rename from src/core/reference/src/op/jit_generator.hpp rename to src/core/reference/include/openvino/reference/utils/jit_generator.hpp index b4b9cd7a60c23f..539f686020049c 100644 --- a/src/core/reference/src/op/jit_generator.hpp +++ b/src/core/reference/include/openvino/reference/utils/jit_generator.hpp @@ -15,7 +15,6 @@ namespace ov { namespace reference { namespace jit { -#ifdef XBYAK64 static const Xbyak::Operand::Code abi_save_gpr_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, @@ -23,28 +22,42 @@ static const Xbyak::Operand::Code abi_save_gpr_regs[] = { Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, -# ifdef _WIN32 +#ifdef _WIN32 Xbyak::Operand::RDI, Xbyak::Operand::RSI, -# endif +#endif }; -# ifdef _WIN32 -# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RCX) // RCX -# else -# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RDI) // RDI -# endif -#endif // XBYAK64 +#ifdef _WIN32 +# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RCX) // RCX +#else +# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RDI) // RDI +#endif -class Generator : public Xbyak::CodeGenerator { - static constexpr size_t xmm_len = 16; +typedef enum { + isa_any, + sse42, + avx, + avx2, + avx512_common, + avx512_core, + avx512_core_vnni, + avx512_mic, + avx512_mic_4ops, + avx512_core_bf16, + avx512_vpopcnt, + fp16, + pclmulqdq, + vpclmulqdq +} cpu_isa_t; +class Generator : public Xbyak::CodeGenerator { #ifdef _WIN32 - static constexpr size_t xmm_to_preserve_start = 6; - static constexpr size_t xmm_to_preserve = 10; + static constexpr size_t xmm_to_preserve_start = 6llu; + static constexpr size_t xmm_to_preserve = 10llu; #else - static constexpr size_t xmm_to_preserve_start = 0; - static constexpr size_t xmm_to_preserve = 0; + static constexpr size_t xmm_to_preserve_start = 0lu; + static constexpr size_t xmm_to_preserve = 0lu; #endif static const size_t num_abi_save_gpr_regs = sizeof(abi_save_gpr_regs) / sizeof(abi_save_gpr_regs[0]); @@ -52,29 +65,19 @@ class Generator : public Xbyak::CodeGenerator { const Xbyak::Reg64 reg_EVEX_max_8b_offt; static constexpr int EVEX_max_8b_offt = 0x200; + size_t m_vlen = ymm_len; public: - const Xbyak::Reg64 param = abi_param1; + static constexpr size_t xmm_len = 16lu; + static constexpr size_t ymm_len = 32lu; + static constexpr size_t zmm_len = 64lu; - typedef enum { - isa_any, - sse42, - avx, - avx2, - avx512_common, - avx512_core, - avx512_core_vnni, - avx512_mic, - avx512_mic_4ops, - avx512_core_bf16, - avx512_vpopcnt, - fp16 - } cpu_isa_t; + const Xbyak::Reg64 param = abi_param1; static bool mayiuse(const cpu_isa_t cpu_isa); static bool is_x64(); - Generator(void* code_ptr = nullptr, size_t code_size = 16 * 1024); + Generator(cpu_isa_t isa = avx2, void* code_ptr = nullptr, size_t code_size = 16lu * 1024lu); void preamble(); void postamble(); @@ -85,7 +88,12 @@ class Generator : public Xbyak::CodeGenerator { template void copy(const Xbyak::Reg64& dst, const Xbyak::Reg64& src, const Xbyak::Reg64& size); + + size_t get_vlen() { + return m_vlen; + } }; + } // namespace jit } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/utils/registers_pool.hpp b/src/core/reference/include/openvino/reference/utils/registers_pool.hpp new file mode 100644 index 00000000000000..62dfe01ec4ef1d --- /dev/null +++ b/src/core/reference/include/openvino/reference/utils/registers_pool.hpp @@ -0,0 +1,247 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "openvino/core/except.hpp" +#include "openvino/reference/utils/jit_generator.hpp" +namespace ov { +namespace reference { +namespace jit { + +class RegistersPool { +public: + using Ptr = std::shared_ptr; + using WeakPtr = std::weak_ptr; + static constexpr int any_idx = -1; + + template + class Reg { + friend class RegistersPool; + + public: + Reg() {} + Reg(const RegistersPool::Ptr& regPool) { + initialize(regPool); + } + Reg(const RegistersPool::Ptr& regPool, int requested_idx) { + initialize(regPool, requested_idx); + } + ~Reg() { + release(); + } + Reg& operator=(Reg&& other) noexcept { + release(); + reg = other.reg; + regPool = std::move(other.regPool); + return *this; + } + Reg(Reg&& other) noexcept : reg(other.reg), regPool(std::move(other.regPool)) {} + operator TReg&() { + ensure_valid(); + return reg; + } + operator const TReg&() const { + ensure_valid(); + return reg; + } + operator Xbyak::RegExp() const { + ensure_valid(); + return reg; + } + int getIdx() const { + ensure_valid(); + return reg.getIdx(); + } + friend Xbyak::RegExp operator+(const Reg& lhs, const Xbyak::RegExp& rhs) { + lhs.ensure_valid(); + return lhs.operator Xbyak::RegExp() + rhs; + } + void release() { + if (auto pool = regPool.lock()) { + pool->return_to_pool(reg); + regPool.reset(); + } + } + bool is_initialized() const { + return !regPool.expired(); + } + + private: + void ensure_valid() const { + if (!is_initialized()) { + OPENVINO_THROW("RegistersPool::Reg is either not initialized or released"); + } + } + + void initialize(const RegistersPool::Ptr& pool, int requested_idx = any_idx) { + release(); + reg = TReg(pool->template get_free(requested_idx)); + regPool = pool; + } + + private: + TReg reg; + RegistersPool::WeakPtr regPool; + }; + + virtual ~RegistersPool() { + check_unique_and_update(false); + } + + template + static Ptr create(std::initializer_list regsToExclude); + + static Ptr create(cpu_isa_t isa, std::initializer_list regsToExclude); + + template + size_t count_free() const { + if (std::is_base_of::value) { + return m_simd_set.count_unused(); + } else if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + return m_general_set.count_unused(); + } else if (std::is_same::value) { + return count_unused_opmask(); + } + } + +protected: + class PhysicalSet { + public: + PhysicalSet(int size) : m_is_free_index_vector(size, true) {} + + void set_as_used(size_t reg_idx); + + void set_as_unused(size_t reg_idx); + + size_t get_unused(size_t requested_idx); + + void exclude(Xbyak::Reg reg) { + m_is_free_index_vector.at(reg.getIdx()) = false; + } + + size_t count_unused() const; + + private: + size_t get_first_free_index(); + + private: + std::vector m_is_free_index_vector; + }; + + virtual int get_free_opmask(int requested_idx) { + OPENVINO_THROW("get_free_opmask: The Opmask is not supported in current instruction set"); + } + virtual void return_opmask_to_pool(int idx) { + OPENVINO_THROW("return_opmask_to_pool: The Opmask is not supported in current instruction set"); + } + virtual size_t count_unused_opmask() const { + OPENVINO_THROW("count_unused_opmask: The Opmask is not supported in current instruction set"); + } + + RegistersPool(int simd_registers_number); + + RegistersPool(std::initializer_list regsToExclude, int simd_registers_number); + +private: + template + int get_free(int requested_idx) { + if (std::is_base_of::value) { + auto idx = m_simd_set.get_unused(requested_idx); + m_simd_set.set_as_used(idx); + return static_cast(idx); + } else if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + auto idx = m_general_set.get_unused(requested_idx); + m_general_set.set_as_used(idx); + return static_cast(idx); + } else if (std::is_same::value) { + return get_free_opmask(requested_idx); + } + } + + template + void return_to_pool(const TReg& reg) { + if (std::is_base_of::value) { + m_simd_set.set_as_unused(reg.getIdx()); + } else if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + m_general_set.set_as_unused(reg.getIdx()); + } else if (std::is_same::value) { + return_opmask_to_pool(reg.getIdx()); + } + } + + void check_unique_and_update(bool isCtor = true); + + PhysicalSet m_general_set; + PhysicalSet m_simd_set; +}; + +template +class IsaRegistersPool : public RegistersPool { +public: + IsaRegistersPool(std::initializer_list regsToExclude) : RegistersPool(regsToExclude, 32) {} +}; + +template <> +class IsaRegistersPool : public RegistersPool { +public: + IsaRegistersPool() : RegistersPool(32) { + m_opmask_set.exclude( + Xbyak::Opmask(0)); // the Opmask(0) has special meaning for some instructions, like gather instruction + } + + IsaRegistersPool(std::initializer_list regsToExclude) : RegistersPool(regsToExclude, 32) { + for (auto& reg : regsToExclude) { + if (reg.isOPMASK()) { + m_opmask_set.exclude(reg); + } + } + } + + int get_free_opmask(int requested_idx) override { + auto idx = static_cast(m_opmask_set.get_unused(requested_idx)); + m_opmask_set.set_as_used(idx); + return idx; + } + + void return_opmask_to_pool(int idx) override { + m_opmask_set.set_as_unused(idx); + } + + size_t count_unused_opmask() const override { + return m_opmask_set.count_unused(); + } + +protected: + PhysicalSet m_opmask_set{8}; +}; + +template +RegistersPool::Ptr RegistersPool::create(std::initializer_list regsToExclude) { + return std::make_shared>(regsToExclude); +} + +inline RegistersPool::Ptr RegistersPool::create(cpu_isa_t isa, std::initializer_list regsToExclude) { +#define ISA_SWITCH_CASE(isa) \ + case isa: \ + return std::make_shared>(regsToExclude); + switch (isa) { + ISA_SWITCH_CASE(avx2) + ISA_SWITCH_CASE(avx512_core) + default: + OPENVINO_THROW("Invalid isa argument in RegistersPool::create(): ", isa); + } +#undef ISA_SWITCH_CASE +} + +} // namespace jit +} // namespace reference +} // namespace ov diff --git a/src/core/reference/src/op/convert.cpp b/src/core/reference/src/op/convert.cpp index 5054121b5615c0..034734afd8fd2a 100644 --- a/src/core/reference/src/op/convert.cpp +++ b/src/core/reference/src/op/convert.cpp @@ -7,7 +7,7 @@ #include "openvino/reference/utils/convert_util.hpp" #ifdef OV_CORE_USE_XBYAK_JIT -# include "jit_generator.hpp" +# include "openvino/reference/utils/jit_generator.hpp" #endif #ifdef OV_CORE_USE_INTRINSICS @@ -256,7 +256,7 @@ class jit_convert_array : public jit::Generator { template static fn_t get() { - if (is_x64() && mayiuse(avx) && mayiuse(avx2) && mayiuse(fp16)) { + if (is_x64() && mayiuse(jit::avx) && mayiuse(jit::avx2) && mayiuse(jit::fp16)) { static const jit_convert_array::context_t context{{sizeof(src_t), &jit::Generator::copy}, {sizeof(dst_t), &jit::Generator::copy}, jit_convert_vec, @@ -460,7 +460,7 @@ class jit_count_out_of_range : public jit::Generator { template static fn_t get() { - if (is_x64() && mayiuse(avx2)) { + if (is_x64() && mayiuse(jit::avx2)) { static const jit_count_out_of_range::context_t context{ {sizeof(data_t), &jit::Generator::copy}, jit_count_out_of_range_vec_prepare, diff --git a/src/core/reference/src/op/multiclass_nms.cpp b/src/core/reference/src/op/multiclass_nms.cpp index 3f0b6c8634ea9d..88d8d405c618af 100644 --- a/src/core/reference/src/op/multiclass_nms.cpp +++ b/src/core/reference/src/op/multiclass_nms.cpp @@ -48,7 +48,6 @@ static float intersectionOverUnion(const Rectangle& boxI, const Rectangle& boxJ, // start: start index along axis "M" template std::vector slice_image(const T* data, const Shape& data_shape, const int64_t start, const int64_t item_num) { - std::vector slice_data; const auto class_num = data_shape[0]; const auto item_size = (data_shape.size() == 3) ? data_shape[2] : 1; @@ -57,7 +56,7 @@ std::vector slice_image(const T* data, const Shape& data_shape, const int64_t "Invaid inputs as it is trying to slice data out of range."); const auto row_num = item_num * item_size; - slice_data.reserve(class_num * row_num); + std::vector slice_data(static_cast(class_num * row_num)); T* item_data = slice_data.data(); T* src = const_cast(data + start * item_size); for (size_t i = 0; i < class_num; i++) { diff --git a/src/core/reference/src/op/stft.cpp b/src/core/reference/src/op/stft.cpp index d3a93db0e1e937..6fd5583be21a75 100644 --- a/src/core/reference/src/op/stft.cpp +++ b/src/core/reference/src/op/stft.cpp @@ -21,8 +21,9 @@ void stft(const float* signal, const int64_t frame_size, const int64_t frame_step, const bool transpose_frames) { - constexpr size_t signal_axis = 1; - const auto batch_size = signal_shape[0]; + const auto is_signal_1D = signal_shape.size() == 1; + const size_t batch_size = is_signal_1D ? 1 : signal_shape[0]; + const size_t signal_axis = is_signal_1D ? 0 : 1; const auto signal_length = signal_shape[signal_axis]; const auto num_frames = static_cast((signal_length - frame_size) / frame_step) + 1; const auto frame_size_dim = static_cast(frame_size); diff --git a/src/core/reference/src/op/jit_generator.cpp b/src/core/reference/src/utils/jit_generator.cpp similarity index 91% rename from src/core/reference/src/op/jit_generator.cpp rename to src/core/reference/src/utils/jit_generator.cpp index 7d7da06d5da8d5..39dc31c0033f9f 100644 --- a/src/core/reference/src/op/jit_generator.cpp +++ b/src/core/reference/src/utils/jit_generator.cpp @@ -11,9 +11,10 @@ # endif # include -# include "jit_generator.hpp" +# include "openvino/core/except.hpp" # include "openvino/core/type/bfloat16.hpp" # include "openvino/core/type/float16.hpp" +# include "openvino/reference/utils/jit_generator.hpp" namespace ov { namespace reference { @@ -51,6 +52,10 @@ bool Generator::mayiuse(const cpu_isa_t cpu_isa) { return true && cpu.has(Cpu::tAVX512_VPOPCNTDQ); case fp16: return cpu.has(Cpu::tF16C); + case cpu_isa_t::pclmulqdq: + return cpu.has(Cpu::tPCLMULQDQ); + case cpu_isa_t::vpclmulqdq: + return cpu.has(Cpu::tVPCLMULQDQ); case isa_any: return true; } @@ -60,10 +65,18 @@ bool Generator::mayiuse(const cpu_isa_t cpu_isa) { bool Generator::is_x64() { return sizeof(void*) == 8; } -Generator::Generator(void* code_ptr, size_t code_size) +Generator::Generator(cpu_isa_t isa, void* code_ptr, size_t code_size) : Xbyak::CodeGenerator(code_size, code_ptr), size_of_abi_save_regs(num_abi_save_gpr_regs * rax.getBit() / 8 + xmm_to_preserve * xmm_len), - reg_EVEX_max_8b_offt(rbp) {} + reg_EVEX_max_8b_offt(rbp) { + if (isa == avx512_core) { + m_vlen = zmm_len; + } else if (isa == avx2) { + m_vlen = ymm_len; + } else { + OPENVINO_THROW("Unsupported isa: ", isa); + } +} void Generator::preamble() { if (xmm_to_preserve) { diff --git a/src/core/reference/src/utils/registers_pool.cpp b/src/core/reference/src/utils/registers_pool.cpp new file mode 100644 index 00000000000000..413fdcc3ed83cf --- /dev/null +++ b/src/core/reference/src/utils/registers_pool.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/visibility.hpp" + +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +# include "openvino/reference/utils/registers_pool.hpp" + +namespace ov { +namespace reference { +namespace jit { + +RegistersPool::RegistersPool(int simd_registers_number) : m_general_set(16), m_simd_set(simd_registers_number) { + check_unique_and_update(); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RSP)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RAX)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RCX)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RDI)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RBP)); +} + +RegistersPool::RegistersPool(std::initializer_list regsToExclude, int simd_registers_number) + : m_general_set(16), + m_simd_set(simd_registers_number) { + check_unique_and_update(); + for (auto& reg : regsToExclude) { + if (reg.isXMM() || reg.isYMM() || reg.isZMM()) { + m_simd_set.exclude(reg); + } else if (reg.isREG()) { + m_general_set.exclude(reg); + } + } + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RSP)); +} + +void RegistersPool::check_unique_and_update(bool is_ctor) { + static thread_local bool is_created = false; + if (is_ctor) { + if (is_created) { + OPENVINO_THROW("There should be only one instance of RegistersPool per thread"); + } + is_created = true; + } else { + is_created = false; + } +} + +void RegistersPool::PhysicalSet::set_as_used(size_t reg_idx) { + if (reg_idx >= m_is_free_index_vector.size()) { + OPENVINO_THROW("reg_idx is out of bounds in RegistersPool::PhysicalSet::set_as_used()"); + } + if (!m_is_free_index_vector[reg_idx]) { + OPENVINO_THROW("Inconsistency in RegistersPool::PhysicalSet::set_as_used()"); + } + m_is_free_index_vector[reg_idx] = false; +} + +void RegistersPool::PhysicalSet::set_as_unused(size_t reg_idx) { + if (reg_idx >= m_is_free_index_vector.size()) { + OPENVINO_THROW("reg_idx is out of bounds in RegistersPool::PhysicalSet::set_as_used()"); + } + if (m_is_free_index_vector[reg_idx]) { + OPENVINO_THROW("Inconsistency in RegistersPool::PhysicalSet::set_as_unused()"); + } + m_is_free_index_vector[reg_idx] = true; +} + +size_t RegistersPool::PhysicalSet::get_unused(size_t requested_idx) { + if (requested_idx == static_cast(any_idx)) { + return get_first_free_index(); + } else { + if (requested_idx >= m_is_free_index_vector.size()) { + OPENVINO_THROW("requested_idx is out of bounds in RegistersPool::PhysicalSet::get_unused()"); + } + if (!m_is_free_index_vector[requested_idx]) { + OPENVINO_THROW("The register with index #", requested_idx, " already used in the RegistersPool"); + } + return requested_idx; + } +} + +size_t RegistersPool::PhysicalSet::count_unused() const { + size_t count = 0; + for (const auto& isFree : m_is_free_index_vector) { + if (isFree) { + ++count; + } + } + return count; +} + +size_t RegistersPool::PhysicalSet::get_first_free_index() { + for (size_t c = 0; c < m_is_free_index_vector.size(); ++c) { + if (m_is_free_index_vector[c]) { + return c; + } + } + OPENVINO_THROW("Not enough registers in the RegistersPool"); +} + +} // namespace jit +} // namespace reference +} // namespace ov + +#endif // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64 diff --git a/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp b/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp index c2b5f1389dee7b..8b0daeb27663fb 100644 --- a/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp +++ b/src/core/shape_inference/include/lstm_sequence_shape_inference.hpp @@ -7,32 +7,6 @@ namespace ov { namespace op { -namespace v0 { -OPENVINO_SUPPRESS_DEPRECATED_START -template -std::vector> shape_infer(const LSTMSequence* op, const std::vector& input_shapes) { - OPENVINO_SUPPRESS_DEPRECATED_END - constexpr auto num_gates = 4; - constexpr auto num_state_nodes = 2; - const auto output_shapes = - rnn::seq_base_shape_infer(op, input_shapes, num_gates, num_state_nodes, op->get_direction()); - // Validate rank and dimension for P input (the input doesn't exists in the next version of LSTM or other RNN based - // ops) The checks are compatible with the original restrictions of the v0::LSTMSequence - const auto& hidden_size = output_shapes[0][3]; - if (input_shapes.size() > 7 && input_shapes[7].is_static() && hidden_size.is_static()) { - const auto& p_pshape = input_shapes[7]; - NODE_VALIDATION_CHECK(op, p_pshape.rank().compatible(2), "Input tensor P should have rank equal 2."); - NODE_VALIDATION_CHECK(op, - p_pshape[1].compatible(hidden_size * (num_gates - 1)), - "Inorrect shape of P input. Second dimension is: ", - p_pshape[1], - ", expected: ", - hidden_size.get_length() * (num_gates - 1), - "."); - } - return output_shapes; -} -} // namespace v0 namespace v5 { template std::vector> shape_infer(const LSTMSequence* op, const std::vector& input_shapes) { diff --git a/src/core/shape_inference/include/range_shape_inference.hpp b/src/core/shape_inference/include/range_shape_inference.hpp index 3be56a4543a7dd..5d754810d9b80a 100644 --- a/src/core/shape_inference/include/range_shape_inference.hpp +++ b/src/core/shape_inference/include/range_shape_inference.hpp @@ -11,6 +11,31 @@ namespace op { namespace ShapeInferRange { +template ::value>::type* = nullptr> +void symbol_propagation(const Node* op, + std::vector& output_shapes, + const double& start, + const double& step, + bool start_val, + bool step_val) { + output_shapes[0] = ov::PartialShape::dynamic(1); + if (op->get_input_size() == 3 && step_val && step == 1) { + auto start_symbol = op->input_value(0).get_tensor().get_value_symbol(); + auto stop_symbol = op->input_value(1).get_tensor().get_value_symbol(); + if (start_val && start == 0 && !stop_symbol.empty()) { + output_shapes[0][0].set_symbol(stop_symbol[0]); + } + } +} + +template ::value>::type* = nullptr> +void symbol_propagation(const Node* op, + std::vector& output_shapes, + const double& start, + const double& step, + bool start_val, + bool step_val) {} + template > std::vector range_shape_infer(const Node* op, const std::vector& input_shapes, @@ -35,12 +60,18 @@ std::vector range_shape_infer(const Node* op, NODE_VALIDATION_CHECK(op, start_val->size() == 1); start = (*start_val)[0]; NODE_VALIDATION_CHECK(op, std::isfinite(start) && !std::isnan(start), "'start' cannot be nan or infinite."); + if (output_is_integral) + // all inputs must be casted to output_type before the rounding for casting values are done towards zero + start = std::trunc(start); } if (stop_val) { NODE_VALIDATION_CHECK(op, stop_val->size() == 1); stop = (*stop_val)[0]; NODE_VALIDATION_CHECK(op, std::isfinite(stop) && !std::isnan(stop), "'stop' cannot be nan or infinite."); + if (output_is_integral) + // all inputs must be casted to output_type before the rounding for casting values are done towards zero + stop = std::trunc(stop); } if (step_val) { @@ -52,18 +83,13 @@ std::vector range_shape_infer(const Node* op, NODE_VALIDATION_CHECK(op, std::isfinite(step) && !std::isnan(step) && step != 0, "'step' cannot be zero, nan, or infinite."); + if (output_is_integral) + // all inputs must be casted to output_type before the rounding for casting values are done towards zero + step = std::trunc(step); } auto output_shapes = std::vector(1); if (start_val && stop_val && step_val) { - // all inputs must be casted to output_type before - // the rounding for casting values are done towards zero - if (output_is_integral) { - start = std::trunc(start); - stop = std::trunc(stop); - step = std::trunc(step); - } - // the number of elements is: max(ceil((stop − start) / step), 0) double span; if ((step > 0 && start >= stop) || (step < 0 && start <= stop)) { @@ -76,7 +102,7 @@ std::vector range_shape_infer(const Node* op, output_shapes[0] = TRShape{static_cast(strided)}; } else { - output_shapes[0] = ov::PartialShape::dynamic(1); + symbol_propagation(op, output_shapes, start, step, start_val, step_val); } return output_shapes; } diff --git a/src/core/shape_inference/include/reshape_shape_inference.hpp b/src/core/shape_inference/include/reshape_shape_inference.hpp index 273d803ca8c82d..d3b9acf54289b6 100644 --- a/src/core/shape_inference/include/reshape_shape_inference.hpp +++ b/src/core/shape_inference/include/reshape_shape_inference.hpp @@ -239,6 +239,20 @@ void set_pattern_symbols(const Node* const op, TShape& shape) { template ::value>::type* = nullptr> void set_pattern_symbols(const Node* const, TShape&) {} +/** @brief Deducing symbol relations: number of elements in the tensor doesn't change after the Reshape operation. */ +template ::type, Dimension>::value>::type* = nullptr> +void deduce_symbol_relations(const Product& product) { + auto dyn_in = product.get_dynamic_in(); + auto dyn_out = product.get_dynamic_out(); + dyn_in.merge(dyn_in, dyn_in, dyn_out); +} + +/** @brief Shapes other than PartialShape have no symbols. */ +template ::type, Dimension>::value>::type* = nullptr> +void deduce_symbol_relations(const Product& product) {} + } // namespace reshape namespace v1 { @@ -342,6 +356,10 @@ std::vector shape_infer(const Reshape* op, !dim::is_empty(minus_one_dim), "Non-'-1' output dimensions do not evenly divide the input dimensions"); } + } else { + if (product.get_static_in() == product.get_static_out() && product.get_static_in() != 0) { + deduce_symbol_relations(product); + } } if (input_shape.is_static() && output_shape.is_static()) { diff --git a/src/core/shape_inference/include/squeeze_shape_inference.hpp b/src/core/shape_inference/include/squeeze_shape_inference.hpp index ee71b5452db1c3..31eeea5d36a9ea 100644 --- a/src/core/shape_inference/include/squeeze_shape_inference.hpp +++ b/src/core/shape_inference/include/squeeze_shape_inference.hpp @@ -11,6 +11,117 @@ namespace ov { namespace op { namespace v0 { +template > +std::vector shape_infer(const Squeeze* op, + const std::vector& input_shapes, + const ITensorAccessor& ta = make_tensor_accessor()) { + using DimType = typename T::value_type; + + const auto number_of_inputs = input_shapes.size(); + OPENVINO_ASSERT(!input_shapes.empty()); + + const auto& arg_shape = input_shapes[0]; + const auto& arg_rank = arg_shape.rank(); + auto output_shapes = std::vector(1); + auto& output_shape = output_shapes[0]; + + std::unique_ptr> unique_axes; + + if (number_of_inputs == 1) { + unique_axes.reset(new std::set()); + } else if (number_of_inputs == 2) { + const auto& axes_shape = input_shapes[1]; + NODE_VALIDATION_CHECK(op, + axes_shape.is_dynamic() || ov::util::is_rank_compatible_any_of(axes_shape.rank(), {0, 1}), + "Second input (axes) should not be of rank higher than 1. Got: ", + axes_shape.rank().get_length()); + + std::vector axes; + if (arg_rank.is_static() && axes_shape.is_static()) { + if (auto axes = get_input_const_data_as(op, 1, ta)) { + // The values of `axes` input are known + ov::util::try_normalize_axes(*axes, arg_rank, *op); + unique_axes.reset(new std::set(axes->cbegin(), axes->cend())); + } else if (arg_rank.get_length() > 0 && shape_size(axes_shape.to_shape()) == 1) { + // The `axes` input is a single element tensor which is unique by definition, deducing output rank + const auto has_squeezable_dim = + std::any_of(arg_shape.cbegin(), arg_shape.cend(), [](const DimType& dim) { + return dim.compatible(1); + }); + if (has_squeezable_dim) { + output_shape = PartialShape::dynamic(arg_rank.get_length() - 1); + } else { + output_shape = arg_shape; + } + return output_shapes; + } + } + } else { + // Invalid number of inputs, empty error message for backward compatibility. + NODE_VALIDATION_CHECK(op, false); + } + + if (arg_rank.is_static() && (unique_axes != nullptr)) { + output_shape.resize(0); + if (unique_axes->empty()) { + // if only first input provided or axes are empty remove all dimensions equal to 1. + if (std::any_of(arg_shape.cbegin(), arg_shape.cend(), [](const DimType& d) { + return d.is_dynamic() && d.compatible(1); + })) { + // we are unsure if dynamic dimensions would be equal to 1 or not, so we set dynamic output rank + output_shape = PartialShape::dynamic(); + return output_shapes; + } else { + std::copy_if(arg_shape.cbegin(), + arg_shape.cend(), + std::back_inserter(output_shape), + [](const DimType& dim) { + return !dim.compatible(1); + }); + } + } else { + int64_t idx = 0; + auto rm_axis_iter = unique_axes->cbegin(); + auto rm_axis_end = unique_axes->cend(); + + // Returns true if dimension not squeezable on axis from input axes. + const auto not_squeezable_at_axis = [&rm_axis_iter, &rm_axis_end, &idx](const DimType& dim) { + if ((rm_axis_iter != rm_axis_end) && (*rm_axis_iter == idx++)) { + ++rm_axis_iter; + // Ignore: Pointed by axis, but not squeezable + return !dim.compatible(1); + } else { + return true; + } + }; + + std::copy_if(arg_shape.cbegin(), + arg_shape.cend(), + std::back_inserter(output_shape), + not_squeezable_at_axis); + } + } else { + output_shape = PartialShape::dynamic(); + } + return output_shapes; +} +} // namespace v0 + +namespace v15 { +template +bool apply_allow_axis_skip(const ov::op::v15::Squeeze* const op, + const std::unique_ptr>& unique_axes, + const T& arg_shape) { + using DimType = typename T::value_type; + int64_t i{-1}; + + return op->get_allow_axis_skip() && + std::any_of(arg_shape.cbegin(), arg_shape.cend(), [&unique_axes, &i](const DimType& d) { + ++i; + // Squeeze result with dynamic rank if 1 is in range of selected dynamic dimension. + return d.is_dynamic() && d.compatible(1) && unique_axes->find(i) != unique_axes->end(); + }); +} /** * \brief Do Squeeze shape inference. @@ -59,7 +170,7 @@ std::vector shape_infer(const Squeeze* op, return dim.compatible(1); }); if (has_squeezable_dim) { - output_shape = PartialShape::dynamic(arg_rank.get_length() - 1); + output_shape = PartialShape::dynamic(); } else { output_shape = arg_shape; } @@ -71,7 +182,9 @@ std::vector shape_infer(const Squeeze* op, NODE_VALIDATION_CHECK(op, false); } - if (arg_rank.is_static() && (unique_axes != nullptr)) { + if (!arg_rank.is_static() || (unique_axes == nullptr) || apply_allow_axis_skip(op, unique_axes, arg_shape)) { + output_shape = PartialShape::dynamic(); + } else if (arg_rank.is_static() && (unique_axes != nullptr)) { output_shape.resize(0); if (unique_axes->empty()) { // if only first input provided or axes are empty remove all dimensions equal to 1. @@ -115,6 +228,6 @@ std::vector shape_infer(const Squeeze* op, } return output_shapes; } -} // namespace v0 +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/shape_inference/include/stft_shape_inference.hpp b/src/core/shape_inference/include/stft_shape_inference.hpp index 9605a8fa797d8d..41abf6640ddb96 100644 --- a/src/core/shape_inference/include/stft_shape_inference.hpp +++ b/src/core/shape_inference/include/stft_shape_inference.hpp @@ -16,6 +16,8 @@ std::vector shape_infer(const STFT* op, const std::vector& input_shapes, const ITensorAccessor& ta = make_tensor_accessor()) { using TDim = typename TRShape::value_type; + using TDimVal = typename TDim::value_type; + NODE_VALIDATION_CHECK(op, input_shapes.size() == 4); const auto& signal_shape = input_shapes[0]; @@ -23,10 +25,11 @@ std::vector shape_infer(const STFT* op, const auto& frame_size_shape = input_shapes[2]; const auto& frame_step_shape = input_shapes[3]; + const auto signal_shape_rank = signal_shape.rank(); NODE_SHAPE_INFER_CHECK(op, input_shapes, - signal_shape.rank().compatible(2), - "The shape of signal must be 2D [batch, signal_size]."); + signal_shape_rank.compatible(1) || signal_shape_rank.compatible(2), + "The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]."); NODE_SHAPE_INFER_CHECK(op, input_shapes, window_shape.rank().compatible(1), @@ -40,25 +43,36 @@ std::vector shape_infer(const STFT* op, frame_step_shape.rank().compatible(0), "The shape of frame_step must be a scalar."); + if (signal_shape_rank.is_dynamic()) { + return {signal_shape}; + } + const auto frame_size = get_input_const_data_as(op, 2, ta); const auto frame_step = get_input_const_data_as(op, 3, ta); - if (signal_shape.rank().is_dynamic()) { - return {signal_shape}; - } else if (!frame_size || !frame_step) { - return {TRShape{signal_shape[0], -1, -1, 2}}; + const auto is_signal_1D = signal_shape.size() == 1; + if (!frame_size || !frame_step) { + if (is_signal_1D) { + return {TRShape{TDim(ov::util::dim::inf_bound), TDim(ov::util::dim::inf_bound), 2}}; + } else { + return {TRShape{signal_shape[0], TDim(ov::util::dim::inf_bound), TDim(ov::util::dim::inf_bound), 2}}; + } } const auto& frame_size_val = (*frame_size)[0]; const auto& frame_step_val = (*frame_step)[0]; + const TDim& signal_dim = is_signal_1D ? signal_shape[0] : signal_shape[1]; + const bool is_frame_size_in_range = + 0 < frame_size_val && (signal_dim.is_static() ? static_cast(frame_size_val) <= signal_dim.get_length() + : frame_size_val <= signal_dim.get_interval().get_max_val()); NODE_SHAPE_INFER_CHECK(op, input_shapes, - 0 < frame_size_val && frame_size_val < signal_shape[1].get_interval().get_max_val(), + is_frame_size_in_range, "Provided frame size is ", frame_size_val, " but must be in range [1, ", - signal_shape[1], + signal_dim, "]."); NODE_SHAPE_INFER_CHECK(op, @@ -68,17 +82,18 @@ std::vector shape_infer(const STFT* op, frame_step_val, " but must be greater than zero."); + const bool is_win_shape_correct = + window_shape.is_dynamic() || (TDimVal{0} < window_shape[0].get_length() && + window_shape[0].get_length() <= static_cast(frame_size_val)); NODE_SHAPE_INFER_CHECK(op, input_shapes, - window_shape.is_dynamic() || - (0 < window_shape[0].get_length() && window_shape[0].get_length() <= frame_size_val), + is_win_shape_correct, "Window input dimension must be in range [1, ", frame_size_val, "]."); - const auto& batch_dim = signal_shape[0]; - const TDim frame_size_dim = TDim{frame_size_val}; - const TDim signal_frame_size_diff = signal_shape[1] - frame_size_dim; + const TDim frame_size_dim = static_cast(frame_size_val); + const TDim signal_frame_size_diff = signal_dim - frame_size_dim; TDim fft_samples_dim = (frame_size_val / 2) + 1; // Divsion opeartor for static Dimension of PartialShape can return non static dimension and ceil instead of floor @@ -87,9 +102,13 @@ std::vector shape_infer(const STFT* op, std::vector output_shapes; if (op->get_transpose_frames()) { - output_shapes.emplace_back(TRShape{batch_dim, std::move(fft_samples_dim), std::move(frames_dim), 2}); + output_shapes.emplace_back(TRShape{std::move(fft_samples_dim), std::move(frames_dim), 2}); } else { - output_shapes.emplace_back(TRShape{batch_dim, std::move(frames_dim), std::move(fft_samples_dim), 2}); + output_shapes.emplace_back(TRShape{std::move(frames_dim), std::move(fft_samples_dim), 2}); + } + if (!is_signal_1D) { + const auto& batch_dim = signal_shape[0]; + output_shapes[0].insert(output_shapes[0].begin(), batch_dim); } return output_shapes; } diff --git a/src/core/src/descriptor/tensor.cpp b/src/core/src/descriptor/tensor.cpp index 31509605003fdb..ae3f7c6e77cd4f 100644 --- a/src/core/src/descriptor/tensor.cpp +++ b/src/core/src/descriptor/tensor.cpp @@ -116,18 +116,9 @@ void ov::descriptor::Tensor::clone_from(const ov::descriptor::Tensor& old) { m_lower_value = old.get_lower_value(); m_upper_value = old.get_upper_value(); m_value_symbol = old.get_value_symbol(); - m_legacy_name = old.m_legacy_name; m_rt_info = old.get_rt_info(); } -std::string ov::descriptor::get_ov_tensor_legacy_name(const ov::descriptor::Tensor& tensor) { - return tensor.m_legacy_name; -} - -void ov::descriptor::set_ov_tensor_legacy_name(ov::descriptor::Tensor& tensor, const std::string& tensor_name) { - tensor.m_legacy_name = tensor_name; -} - void ov::descriptor::set_tensor_type(ov::descriptor::Tensor& tensor, const element::Type& element_type, const PartialShape& pshape) { @@ -148,10 +139,6 @@ std::ostream& ov::descriptor::operator<<(std::ostream& out, const ov::descriptor names += ", "; names += name; } - OPENVINO_SUPPRESS_DEPRECATED_START - if (names.empty()) - names = get_ov_tensor_legacy_name(tensor); - OPENVINO_SUPPRESS_DEPRECATED_END out << "Tensor(" << names << ")"; return out; } diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp index 17780f7751d52e..4a8f4598f55cc3 100644 --- a/src/core/src/graph_util.cpp +++ b/src/core/src/graph_util.cpp @@ -273,36 +273,16 @@ bool replace_output_update_name(Output output, const Output& replace }); }; - bool preserve_legacy_output_name = false; if (has_result_consumers(output)) { - preserve_legacy_output_name = true; if (output.get_node()->get_output_size() != 1 || replacement.get_node()->get_output_size() != 1 || is_type(replacement.get_node()) || has_result_consumers(replacement)) { return false; } - } - - OPENVINO_SUPPRESS_DEPRECATED_START - if (preserve_legacy_output_name) { replacement.get_node()->set_friendly_name(output.get_node()->get_friendly_name()); - // Update output tensor name - const auto& output_tensor_name = ov::descriptor::get_ov_tensor_legacy_name(output.get_tensor()); - if (!output_tensor_name.empty()) { - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), output_tensor_name); - } else { - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), output.get_node()->get_friendly_name()); - } } - // Save replacement tensor name before replacement as they will be overridden by the output tensor name - const auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(replacement.get_tensor()); - output.replace(replacement); - // Restore back original replacement tensor name - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), tensor_name); - OPENVINO_SUPPRESS_DEPRECATED_END - copy_runtime_info({replacement.get_node_shared_ptr(), output.get_node_shared_ptr()}, replacement.get_node_shared_ptr()); return true; diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index 0341e4477f4cfb..ec9197a5a337cb 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -157,10 +157,6 @@ std::shared_ptr ov::Node::copy_with_new_inputs( } for (size_t i = 0; i < get_output_size(); i++) { clone->get_output_tensor(i).set_names(get_output_tensor(i).get_names()); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(clone->get_output_tensor(i), - ov::descriptor::get_ov_tensor_legacy_name(get_output_tensor(i))); - OPENVINO_SUPPRESS_DEPRECATED_END } return clone; } @@ -696,8 +692,8 @@ bool ov::Node::evaluate_symbol(TensorSymbolVector& output_symbols) const { return false; } -bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& input_values) { - OV_ITT_SCOPED_TASK(ov::itt::domains::core, "Node::constant_fold"); +bool ov::Node::can_constant_fold(const OutputVector& input_values) const { + OV_ITT_SCOPED_TASK(ov::itt::domains::core, "Node::can_constant_fold"); if (is_const_fold_disabled()) { return false; @@ -707,8 +703,16 @@ bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& in bool all_constants = std::all_of(input_values.begin(), input_values.end(), [](const Output& input) { return ov::as_type_ptr(input.get_node_shared_ptr()); }); - if (!all_constants) + + return all_constants; +} + +bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& input_values) { + OV_ITT_SCOPED_TASK(ov::itt::domains::core, "Node::constant_fold"); + + if (!Node::can_constant_fold(input_values)) { return false; + } NodeVector nodes; TensorVector input_tensors; diff --git a/src/core/src/node_output.cpp b/src/core/src/node_output.cpp index 3608abe13f3953..a2ea865b2ec84e 100644 --- a/src/core/src/node_output.cpp +++ b/src/core/src/node_output.cpp @@ -73,25 +73,6 @@ void Output::replace(const Output& replacement) { input.replace_source_output(replacement); } replacement.get_tensor_ptr()->add_names(get_tensor_ptr()->get_names()); - OPENVINO_SUPPRESS_DEPRECATED_START - // In legacy API we rely on output port tensor name and use it as an input or output name for the model - // Due to m_name is just a string, and we can't store multiple aliases for single output port we have to - // handle two situations during replacement: - // 1. When we replace consumers to Parameter output port we can't change its name, so we skip this part - // 2. In other cases when we replace consumers to another output port we should set name. For example: - // if we eliminate Node2 from Node1->Node2->Result we have to set Node2 output port name to Node1 - // output port name, so the output name for model won't be changed. - // But there are some cases when output name can not be preserved, so the replacement shouldn't be used: - // 1. Parameter->Node->Result - if we eliminate Node we will lose output name - // 2. Node1-->Node2->Result - if we eliminate Node2 we will lose Result output name - // `->Result - // In both of these cases please use replace_output_update_name() method which automatically prevents the - // replacement for cases when we can not preserve input/output names of model. - if (!is_type(replacement.get_node())) { - ov::descriptor::set_ov_tensor_legacy_name(replacement.get_tensor(), - ov::descriptor::get_ov_tensor_legacy_name(get_tensor())); - } - OPENVINO_SUPPRESS_DEPRECATED_END ov::copy_output_runtime_info({*this, replacement}, {replacement}); } diff --git a/src/core/src/op/assign.cpp b/src/core/src/op/assign.cpp index bf6e55c11b1d39..7798d4328049af 100644 --- a/src/core/src/op/assign.cpp +++ b/src/core/src/op/assign.cpp @@ -134,7 +134,7 @@ bool Assign::has_evaluate() const { return true; } -bool Assign::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool Assign::can_constant_fold(const OutputVector& input_values) const { return false; } } // namespace v6 diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index 95df6379ba284e..e06718ef4e1fd5 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -663,7 +663,7 @@ bool Constant::evaluate_upper(TensorVector& outputs) const { return evaluate(outputs, {}); } -bool Constant::constant_fold(OutputVector&, const OutputVector&) { +bool Constant::can_constant_fold(const OutputVector& input_values) const { return false; } diff --git a/src/core/src/op/convert_like.cpp b/src/core/src/op/convert_like.cpp index 3dc0159bb556be..4ae4ea982f8cd9 100644 --- a/src/core/src/op/convert_like.cpp +++ b/src/core/src/op/convert_like.cpp @@ -29,9 +29,13 @@ std::shared_ptr ConvertLike::clone_with_new_inputs(const OutputVector& new return std::make_shared(new_args.at(0), new_args.at(1)); } +bool ConvertLike::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled(); +} + bool ConvertLike::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_OP_SCOPE(v1_ConvertLike_constant_fold); - if (is_const_fold_disabled()) { + if (!can_constant_fold(input_values)) { return false; } diff --git a/src/core/src/op/identity.cpp b/src/core/src/op/identity.cpp new file mode 100644 index 00000000000000..9a18278beca913 --- /dev/null +++ b/src/core/src/op/identity.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/identity.hpp" + +#include + +#include "itt.hpp" +#include "openvino/core/attribute_visitor.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/util/op_types.hpp" +#include "openvino/reference/identity.hpp" + +namespace ov { +namespace op { +namespace v16 { + +Identity::Identity(const Output& data) : Op({data}) { + constructor_validate_and_infer_types(); +} + +bool Identity::Identity::visit_attributes(AttributeVisitor& visitor) { + OV_OP_SCOPE(v16_Identity_visit_attributes); + return true; +} + +void Identity::Identity::validate_and_infer_types() { + OV_OP_SCOPE(v16_Identity_validate_and_infer_types); + + NODE_VALIDATION_CHECK(this, get_input_size() == 1); + + set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); +} + +std::shared_ptr Identity::Identity::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v16_Identity_clone_with_new_inputs); + check_new_args_count(this, new_args); + + return std::make_shared(new_args.at(0)); +} +} // namespace v16 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/lstm_sequence.cpp b/src/core/src/op/lstm_sequence.cpp index 4d9afa58f469b3..3f70b74e5ef22a 100644 --- a/src/core/src/op/lstm_sequence.cpp +++ b/src/core/src/op/lstm_sequence.cpp @@ -10,155 +10,6 @@ #include "openvino/op/util/recurrent_sequence.hpp" namespace ov { -OPENVINO_SUPPRESS_DEPRECATED_START -op::v0::LSTMSequence::LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const Output& P, - const std::int64_t hidden_size, - const LSTMSequence::direction lstm_direction, - LSTMWeightsFormat weights_format, - const std::vector activations_alpha, - const std::vector activations_beta, - const std::vector activations, - const float clip_threshold, - const bool input_forget) - : RNNCellBase({X, initial_hidden_state, initial_cell_state, sequence_lengths, W, R, B, P}, - hidden_size, - clip_threshold, - activations, - activations_alpha, - activations_beta), - m_direction(lstm_direction), - m_input_forget(input_forget), - m_weights_format(weights_format) { - constructor_validate_and_infer_types(); -} - -op::v0::LSTMSequence::LSTMSequence(const Output& X, - const Output& initial_hidden_state, - const Output& initial_cell_state, - const Output& sequence_lengths, - const Output& W, - const Output& R, - const Output& B, - const std::int64_t hidden_size, - const LSTMSequence::direction lstm_direction, - LSTMWeightsFormat weights_format, - const std::vector& activations_alpha, - const std::vector& activations_beta, - const std::vector& activations, - const float clip_threshold, - const bool input_forget) - : op::v0::LSTMSequence( - X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - Constant::create(element::f32, - Shape{(lstm_direction == LSTMSequence::direction::BIDIRECTIONAL ? 2UL : 1UL), - 3UL * static_cast(hidden_size)}, - std::vector{0.f}), - hidden_size, - lstm_direction, - weights_format, - activations_alpha, - activations_beta, - activations, - clip_threshold, - input_forget) {} - -bool op::v0::LSTMSequence::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_LSTMSequence_visit_attributes); - visitor.on_attribute("hidden_size", m_hidden_size); - visitor.on_attribute("activations", m_activations); - visitor.on_attribute("activations_alpha", m_activations_alpha); - visitor.on_attribute("activations_beta", m_activations_beta); - visitor.on_attribute("clip", m_clip); - visitor.on_attribute("direction", m_direction); - - visitor.on_attribute("input_forget", m_input_forget); - visitor.on_attribute("weights_format", m_weights_format); - return true; -} - -std::shared_ptr op::v0::LSTMSequence::clone_with_new_inputs(const OutputVector& new_args) const { - OV_OP_SCOPE(v0_LSTMSequence_clone_with_new_inputs); - check_new_args_count(this, new_args); - if (new_args.size() == 8) { - return std::make_shared(new_args.at(0), // X - new_args.at(1), // initial_hidden_state - new_args.at(2), // initial_cell_state - new_args.at(3), // sequence_lengths - new_args.at(4), // W - new_args.at(5), // R - new_args.at(6), // B - new_args.at(7), // P - m_hidden_size, - m_direction, - m_weights_format, - m_activations_alpha, - m_activations_beta, - m_activations, - m_clip, - m_input_forget); - } else if (new_args.size() == 7) { - return std::make_shared(new_args.at(0), // X - new_args.at(1), // initial_hidden_state - new_args.at(2), // initial_cell_state - new_args.at(3), // sequence_lengths - new_args.at(4), // W - new_args.at(5), // R - new_args.at(6), // B - m_hidden_size, - m_direction, - m_weights_format, - m_activations_alpha, - m_activations_beta, - m_activations, - m_clip, - m_input_forget); - } else { - OPENVINO_THROW("Incorrect number of new arguments"); - } -} - -void op::v0::LSTMSequence::validate_and_infer_types() { - OV_OP_SCOPE(v0_LSTMSequence_validate_and_infer_types); - auto result_et = element::dynamic; - - // Validate input types and save result for output type - NODE_VALIDATION_CHECK(this, - element::Type::merge(result_et, result_et, get_input_element_type(0)) && - element::Type::merge(result_et, result_et, get_input_element_type(1)) && - element::Type::merge(result_et, result_et, get_input_element_type(2)) && - element::Type::merge(result_et, result_et, get_input_element_type(4)) && - element::Type::merge(result_et, result_et, get_input_element_type(5)) && - element::Type::merge(result_et, result_et, get_input_element_type(6)), - "Element types for X, initial_hidden_state, initial_cell_state, W, R and B inputs do " - "not match."); - - // Mark inputs which are relevant to output parameters - for (size_t i = 0; i <= 6; ++i) - set_input_is_relevant_to_shape(i); - - const auto input_shapes = ov::util::get_node_input_partial_shapes(*this); - auto output_shapes = shape_infer(this, input_shapes); - - // Set output size, type and shape - set_output_type(0, result_et, output_shapes[0]); - set_output_type(1, result_et, output_shapes[1]); - set_output_type(2, result_et, output_shapes[2]); -} -OPENVINO_SUPPRESS_DEPRECATED_END - bool op::v5::LSTMSequence::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v5_LSTMSequence_visit_attributes); visitor.on_attribute("direction", m_direction); diff --git a/src/core/src/op/paged_attention.cpp b/src/core/src/op/paged_attention.cpp index 261b0ce1c47605..cdcb66e86ee33e 100644 --- a/src/core/src/op/paged_attention.cpp +++ b/src/core/src/op/paged_attention.cpp @@ -4,6 +4,7 @@ #include "openvino/op/paged_attention.hpp" +#include "dimension_util.hpp" #include "itt.hpp" #include "openvino/op/op.hpp" @@ -146,10 +147,33 @@ void PagedAttentionExtension::validate_and_infer_types() { get_input_element_type(12), "."); + // value head_size may be not same with key + auto out_ps = get_input_partial_shape(0); + const auto& key_ps = get_input_partial_shape(1); + const auto& value_ps = get_input_partial_shape(2); + if (out_ps.rank().is_static()) { + if (key_ps.rank().is_static() && value_ps.rank().is_static() && key_ps[1].is_static()) { + // The dim of out_ps[1] should be `num_heads * v_head_size`, it can be got from: + // because: + // q: query_ps[1] = num_heads * head_size + // k: key_ps[1] = num_kv_heads * head_size + // v: value_ps[1] = num_kv_heads * v_head_size + // therefore: + // q * v / k = (num_heads * head_size) * (num_kv_heads * v_head_size) / + // (num_kv_heads * head_size) = num_heads * v_head_size + out_ps[1] = out_ps[1] * value_ps[1] / key_ps[1].get_length(); + NODE_VALIDATION_CHECK(this, + !ov::util::dim::is_empty(out_ps[1]), + "The last dimension of output should not be empty."); + } else { + out_ps[1] = Dimension::dynamic(); + } + } + if (m_output_type[0] == ov::element::undefined) { - set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); + set_output_type(0, get_input_element_type(0), out_ps); } else { - set_output_type(0, m_output_type[0], get_input_partial_shape(0)); + set_output_type(0, m_output_type[0], out_ps); } if (m_output_type[1] == ov::element::undefined) { diff --git a/src/core/src/op/random_uniform.cpp b/src/core/src/op/random_uniform.cpp index e62be4d26afc58..9aafed881086b6 100644 --- a/src/core/src/op/random_uniform.cpp +++ b/src/core/src/op/random_uniform.cpp @@ -88,7 +88,7 @@ std::shared_ptr RandomUniform::clone_with_new_inputs(const OutputVector& n } /// \return Turns off constant folding for RandomUniform operation. -bool RandomUniform::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool RandomUniform::can_constant_fold(const OutputVector& input_values) const { return false; } diff --git a/src/core/src/op/read_value.cpp b/src/core/src/op/read_value.cpp index 162cb5067bc00a..0d63456a3b8348 100644 --- a/src/core/src/op/read_value.cpp +++ b/src/core/src/op/read_value.cpp @@ -176,7 +176,7 @@ bool ReadValue::has_evaluate() const { return true; } -bool ReadValue::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool ReadValue::can_constant_fold(const OutputVector& input_values) const { return false; } } // namespace v6 diff --git a/src/core/src/op/reshape.cpp b/src/core/src/op/reshape.cpp index ab0e0a0c17cbde..477e210f574269 100644 --- a/src/core/src/op/reshape.cpp +++ b/src/core/src/op/reshape.cpp @@ -97,7 +97,7 @@ bool Reshape::evaluate_symbol(TensorSymbolVector& output_symbols) const { } bool Reshape::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { - if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { + if (!can_constant_fold(inputs_values)) { return false; } @@ -108,6 +108,10 @@ bool Reshape::constant_fold(OutputVector& output_values, const OutputVector& inp return false; } } + +bool Reshape::can_constant_fold(const OutputVector& input_values) const { + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} } // namespace v1 } // namespace op } // namespace ov diff --git a/src/core/src/op/result.cpp b/src/core/src/op/result.cpp index 3667e5ff22b422..237d6bd7a2084a 100644 --- a/src/core/src/op/result.cpp +++ b/src/core/src/op/result.cpp @@ -67,7 +67,7 @@ bool Result::has_evaluate() const { return true; } -bool Result::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool Result::can_constant_fold(const OutputVector& input_values) const { return false; } diff --git a/src/core/src/op/search_sorted.cpp b/src/core/src/op/search_sorted.cpp index 8b9bb012b27106..65b5ff31861d8e 100644 --- a/src/core/src/op/search_sorted.cpp +++ b/src/core/src/op/search_sorted.cpp @@ -12,9 +12,13 @@ namespace ov { namespace op { namespace v15 { -SearchSorted::SearchSorted(const Output& sorted_sequence, const Output& values, bool right_mode) +SearchSorted::SearchSorted(const Output& sorted_sequence, + const Output& values, + bool right_mode, + const element::Type& output_type) : Op({sorted_sequence, values}), - m_right_mode(right_mode) { + m_right_mode(right_mode), + m_output_type(output_type) { constructor_validate_and_infer_types(); } @@ -23,20 +27,25 @@ void SearchSorted::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, get_input_element_type(0).compatible(get_input_element_type(1)), "Sorted sequence and values must have the same element type."); + NODE_VALIDATION_CHECK(this, + m_output_type == element::i32 || m_output_type == element::i64, + "The element type of the last output can only be set to i32 or i64."); + const auto& output_shapes = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)); - set_output_type(0, ov::element::i64, output_shapes[0]); + set_output_type(0, m_output_type, output_shapes[0]); } bool SearchSorted::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v15_SearchSorted_visit_attributes); visitor.on_attribute("right_mode", m_right_mode); + visitor.on_attribute("output_type", m_output_type); return true; } std::shared_ptr SearchSorted::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v15_SearchSorted_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), get_right_mode()); + return std::make_shared(new_args.at(0), new_args.at(1), get_right_mode(), get_output_type_attr()); } } // namespace v15 } // namespace op diff --git a/src/core/src/op/shape_of.cpp b/src/core/src/op/shape_of.cpp index 293c1b5fc5a59c..9676a5704ec99c 100644 --- a/src/core/src/op/shape_of.cpp +++ b/src/core/src/op/shape_of.cpp @@ -168,9 +168,13 @@ bool ShapeOf::evaluate_symbol(TensorSymbolVector& output_symbols) const { return shape_of::evaluate_symbol(this, output_symbols); } +bool ShapeOf::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled() && input_values[0].get_partial_shape().is_static(); +} + bool ShapeOf::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_OP_SCOPE(v3_ShapeOf_constant_fold); - if (is_const_fold_disabled()) { + if (!can_constant_fold(input_values)) { return false; } return shape_of::constant_fold_shape_of(this, output_values[0], input_values[0]); @@ -222,9 +226,13 @@ bool ShapeOf::has_evaluate() const { } } +bool ShapeOf::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled() && input_values[0].get_partial_shape().is_static(); +} + bool ShapeOf::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_OP_SCOPE(v0_ShapeOf_constant_fold); - if (is_const_fold_disabled()) { + if (!can_constant_fold(input_values)) { return false; } return shape_of::constant_fold_shape_of(this, output_values[0], input_values[0]); diff --git a/src/core/src/op/squeeze.cpp b/src/core/src/op/squeeze.cpp index 3abc0a773192d2..b79165ca4f5543 100644 --- a/src/core/src/op/squeeze.cpp +++ b/src/core/src/op/squeeze.cpp @@ -6,31 +6,19 @@ #include -#include "bound_evaluate.hpp" #include "itt.hpp" -#include "openvino/core/validation_util.hpp" -#include "openvino/op/constant.hpp" #include "squeeze_shape_inference.hpp" namespace ov { namespace op { namespace v0 { -namespace validate { -namespace { +Squeeze::Squeeze() : util::SqueezeBase() {} -bool axes_has_and_set_bound(const Node& op) { - return (op.get_input_size() < 2) || op.get_input_tensor(1).has_and_set_bound(); -} -} // namespace -} // namespace validate - -Squeeze::Squeeze() : Op() {} - -Squeeze::Squeeze(const Output& data, const Output& axes) : Op({data, axes}) { +Squeeze::Squeeze(const Output& data, const Output& axes) : util::SqueezeBase(data, axes) { constructor_validate_and_infer_types(); } -Squeeze::Squeeze(const Output& data) : Op({data}) { +Squeeze::Squeeze(const Output& data) : util::SqueezeBase(data) { constructor_validate_and_infer_types(); } @@ -69,58 +57,68 @@ bool Squeeze::evaluate(TensorVector& outputs, const TensorVector& inputs) const return true; } -bool Squeeze::has_evaluate() const { - OV_OP_SCOPE(v0_Squeeze_has_evaluate); - const auto validate_axes_type = [](const element::Type& et) -> bool { - switch (et) { - case element::i8: - case element::i16: - case element::i32: - case element::i64: - case element::u8: - case element::u16: - case element::u32: - case element::u64: - return true; - default: - return false; - } - }; - - return (get_input_size() < 2) || validate_axes_type(get_input_element_type(1)); +} // namespace v0 + +namespace v15 { +Squeeze::Squeeze() : util::SqueezeBase() {} + +Squeeze::Squeeze(const Output& data, const bool allow_axis_skip) + : util::SqueezeBase(data), + m_allow_axis_skip{allow_axis_skip} { + constructor_validate_and_infer_types(); } -bool Squeeze::evaluate_lower(TensorVector& output_values) const { - OV_OP_SCOPE(v0_Squeeze_evaluate_lower); - return validate::axes_has_and_set_bound(*this) && default_lower_bound_evaluator(this, output_values); +Squeeze::Squeeze(const Output& data, const Output& axes, const bool allow_axis_skip) + : util::SqueezeBase(data, axes), + m_allow_axis_skip{allow_axis_skip} { + constructor_validate_and_infer_types(); } -bool Squeeze::evaluate_upper(TensorVector& output_values) const { - OV_OP_SCOPE(v0_Squeeze_evaluate_upper); - return validate::axes_has_and_set_bound(*this) && default_upper_bound_evaluator(this, output_values); +std::shared_ptr Squeeze::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v15_Squeeze_clone_with_new_inputs); + check_new_args_count(this, new_args); + + switch (new_args.size()) { + case 1: + return std::make_shared(new_args[0], m_allow_axis_skip); + case 2: + return std::make_shared(new_args[0], new_args[1], m_allow_axis_skip); + default: + OPENVINO_THROW("Incorrect number of new arguments"); + } } -bool Squeeze::evaluate_symbol(TensorSymbolVector& output_symbols) const { - return validate::axes_has_and_set_bound(*this) && ov::util::default_symbol_evaluator(this, output_symbols); +void Squeeze::validate_and_infer_types() { + OV_OP_SCOPE(v15_Squeeze_validate_and_infer_types); + + const auto input_shapes = ov::util::get_node_input_partial_shapes(*this); + const auto output_shapes = shape_infer(this, input_shapes); + + set_output_type(0, get_input_element_type(0), output_shapes[0]); } -bool Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { - OV_OP_SCOPE(v0_Squeeze_constant_fold); - if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { - return false; - } +bool Squeeze::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v15_Squeeze_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); - if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { - const auto& shape = get_output_shape(0); - output_values[0] = std::make_shared(*data_const, shape); - return true; - } - return false; + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + outputs[0].set_shape(output_shapes.front().get_shape()); + + std::memcpy(outputs[0].data(), inputs[0].data(), outputs[0].get_byte_size()); + return true; } -bool Squeeze::is_dynamic() const { - return get_output_partial_shape(0).is_dynamic(); +bool Squeeze::visit_attributes(AttributeVisitor& visitor) { + OV_OP_SCOPE(v15_Squeeze_visit_attributes); + visitor.on_attribute("allow_axis_skip", m_allow_axis_skip); + return true; } -} // namespace v0 + +bool Squeeze::get_allow_axis_skip() const { + OV_OP_SCOPE(v15_Squeeze_get_allow_axis_skip); + return m_allow_axis_skip; +} +} // namespace v15 } // namespace op } // namespace ov diff --git a/src/core/src/op/strided_slice.cpp b/src/core/src/op/strided_slice.cpp index deb89fa9a531d4..83ac3dec7a5f4f 100644 --- a/src/core/src/op/strided_slice.cpp +++ b/src/core/src/op/strided_slice.cpp @@ -283,9 +283,13 @@ bool StridedSlice::evaluate_symbol(TensorSymbolVector& output_symbols) const { default_symbol_evaluator(this, {0}, output_symbols); } +bool StridedSlice::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled(); +} + bool StridedSlice::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { auto is_folded = Node::constant_fold(output_values, inputs_values); - if (!is_const_fold_disabled() && !is_folded) { + if (can_constant_fold(inputs_values) && !is_folded) { // If all ignored mask are set for all begin or end then replace this input by dummy constant // to avoid return false from `could_propagate` during bound evaluation (value of const will be ignored). auto get_indices_input = [&inputs_values](size_t port, const std::vector& mask) -> Output { diff --git a/src/core/src/op/unsqueeze.cpp b/src/core/src/op/unsqueeze.cpp index d199c43a2479b5..f8c14a08f70d30 100644 --- a/src/core/src/op/unsqueeze.cpp +++ b/src/core/src/op/unsqueeze.cpp @@ -77,8 +77,12 @@ bool ov::op::v0::Unsqueeze::evaluate_symbol(TensorSymbolVector& output_symbols) return ov::util::default_symbol_evaluator(this, output_symbols); } +bool ov::op::v0::Unsqueeze::can_constant_fold(const OutputVector& input_values) const { + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} + bool ov::op::v0::Unsqueeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { - if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { + if (!can_constant_fold(inputs_values)) { return false; } diff --git a/src/core/src/op/util/broadcast_base.cpp b/src/core/src/op/util/broadcast_base.cpp index 59154e45e2b37a..c2c838afeb38bd 100644 --- a/src/core/src/op/util/broadcast_base.cpp +++ b/src/core/src/op/util/broadcast_base.cpp @@ -471,3 +471,10 @@ bool ov::op::util::BroadcastBase::evaluate_upper(ov::TensorVector& output_values return false; return default_upper_bound_evaluator(this, output_values); } + +bool ov::op::util::BroadcastBase::evaluate_symbol(ov::TensorSymbolVector& output_symbols) const { + if (!input_value(1).get_tensor().has_and_set_bound() || + (get_input_size() > 2 && !input_value(2).get_tensor().has_and_set_bound())) + return false; + return default_symbol_evaluator(this, {0}, output_symbols); +} diff --git a/src/core/src/op/util/gather_base.cpp b/src/core/src/op/util/gather_base.cpp index 92e41781b1de55..dd35edf695ec16 100644 --- a/src/core/src/op/util/gather_base.cpp +++ b/src/core/src/op/util/gather_base.cpp @@ -32,10 +32,6 @@ Shape out_shape_infer(const Shape& data_shape, const Shape& indices_shape, int64 bool cf_gather_with_subgraph(OutputVector& output_values, const OutputVector& input_values, const PartialShape& gather_ps) { - if (gather_ps.is_dynamic() || input_values.size() != 3) { - return false; - } - const auto concat = std::dynamic_pointer_cast(input_values[0].get_node_shared_ptr()); const auto indices = std::dynamic_pointer_cast(input_values[1].get_node_shared_ptr()); const auto axis = std::dynamic_pointer_cast(input_values[2].get_node_shared_ptr()); @@ -67,7 +63,6 @@ bool cf_gather_with_subgraph(OutputVector& output_values, const auto raw_index = indices->cast_vector()[0]; const auto positive_index = ov::util::normalize(raw_index, rank); OPENVINO_ASSERT(positive_index >= 0 && positive_index < rank); - // gather takes exactly one element out of the Concat output const auto gathered_concat_input = concat_inputs[positive_index].get_source_output().get_node_shared_ptr(); // Concat inputs are 1D, resulting tensor shape depends on Gather indices @@ -77,9 +72,7 @@ bool cf_gather_with_subgraph(OutputVector& output_values, const auto axis_const = v0::Constant::create(element::i64, Shape{1}, {0}); gathered = std::make_shared(gathered_concat_input, axis_const); } - output_values[0] = gathered; - return true; } @@ -262,13 +255,19 @@ bool GatherBase::evaluate_symbol(TensorSymbolVector& output_symbols) const { return gather::have_indices_and_axis_bound_set(this) && ov::util::default_symbol_evaluator(this, output_symbols); } +bool GatherBase::can_constant_fold(const OutputVector& input_values) const { + return get_output_partial_shape(0).is_static() && input_values.size() == 3; +} + bool GatherBase::constant_fold(OutputVector& output_values, const OutputVector& input_values) { // try the regular constant folding just for the Gather node if (Node::constant_fold(output_values, input_values)) { return true; - } else { - return gather::cf_gather_with_subgraph(output_values, input_values, get_output_partial_shape(0)); } + if (!can_constant_fold(input_values)) { + return false; + } + return gather::cf_gather_with_subgraph(output_values, input_values, get_output_partial_shape(0)); } } // namespace util } // namespace op diff --git a/src/core/src/op/util/squeeze_base.cpp b/src/core/src/op/util/squeeze_base.cpp new file mode 100644 index 00000000000000..be5a20cbb58620 --- /dev/null +++ b/src/core/src/op/util/squeeze_base.cpp @@ -0,0 +1,91 @@ +#include "openvino/op/util/squeeze_base.hpp" + +#include "bound_evaluate.hpp" +#include "itt.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" + +namespace ov { +namespace op { + +namespace validate { +namespace { + +bool axes_has_and_set_bound(const Node& op) { + return (op.get_input_size() < 2) || op.get_input_tensor(1).has_and_set_bound(); +} +} // namespace +} // namespace validate + +namespace util { +SqueezeBase::SqueezeBase(const Output& data, const Output& axes) : Op({data, axes}) { + constructor_validate_and_infer_types(); +} + +SqueezeBase::SqueezeBase(const Output& data) : Op({data}) { + constructor_validate_and_infer_types(); +} + +bool SqueezeBase::has_evaluate() const { + OV_OP_SCOPE(util_SqueezeBase_has_evaluate); + const auto validate_axes_type = [](const element::Type& et) -> bool { + switch (et) { + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: + return true; + default: + return false; + } + }; + + return (get_input_size() < 2) || validate_axes_type(get_input_element_type(1)); +} + +bool SqueezeBase::evaluate_lower(TensorVector& output_values) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_lower); + return validate::axes_has_and_set_bound(*this) && default_lower_bound_evaluator(this, output_values); +} + +bool SqueezeBase::evaluate_upper(TensorVector& output_values) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_upper); + return validate::axes_has_and_set_bound(*this) && default_upper_bound_evaluator(this, output_values); +} + +bool SqueezeBase::evaluate_symbol(TensorSymbolVector& output_symbols) const { + OV_OP_SCOPE(util_SqueezeBase_evaluate_symbol); + return validate::axes_has_and_set_bound(*this) && ov::util::default_symbol_evaluator(this, output_symbols); +} + +bool SqueezeBase::can_constant_fold(const OutputVector& inputs_values) const { + OV_OP_SCOPE(util_SqueezeBase_can_constant_fold); + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} + +bool SqueezeBase::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { + OV_OP_SCOPE(util_SqueezeBase_constant_fold); + if (!can_constant_fold(inputs_values)) { + return false; + } + + if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { + const auto& shape = get_output_shape(0); + output_values[0] = std::make_shared(*data_const, shape); + return true; + } + return false; +} + +bool SqueezeBase::is_dynamic() const { + OV_OP_SCOPE(util_SqueezeBase_is_dynamic); + return get_output_partial_shape(0).is_dynamic(); +} + +} // namespace util +} // namespace op +} // namespace ov diff --git a/src/core/src/op/util/weightless_caching_attributes.cpp b/src/core/src/op/util/weightless_caching_attributes.cpp new file mode 100644 index 00000000000000..7c540f8a3bef02 --- /dev/null +++ b/src/core/src/op/util/weightless_caching_attributes.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" + +bool ov::WeightlessCacheAttribute::is_copyable() const { + return false; +} diff --git a/src/core/src/opsets/opset.cpp b/src/core/src/opsets/opset.cpp index ca219f1c68ecd3..f2490010e9dc50 100644 --- a/src/core/src/opsets/opset.cpp +++ b/src/core/src/opsets/opset.cpp @@ -113,7 +113,8 @@ const std::map>& ov::get_availabl _OPENVINO_REG_OPSET(opset12), _OPENVINO_REG_OPSET(opset13), _OPENVINO_REG_OPSET(opset14), - _OPENVINO_REG_OPSET(opset15)}; + _OPENVINO_REG_OPSET(opset15), + _OPENVINO_REG_OPSET(opset16)}; #undef _OPENVINO_REG_OPSET return opset_map; } @@ -288,3 +289,14 @@ const ov::OpSet& ov::get_opset15() { }); return opset; } + +const ov::OpSet& ov::get_opset16() { + static OpSet opset; + static std::once_flag flag; + std::call_once(flag, [&]() { +#define _OPENVINO_OP_REG(NAME, NAMESPACE) opset.insert(); +#include "openvino/opsets/opset16_tbl.hpp" +#undef _OPENVINO_OP_REG + }); + return opset; +} diff --git a/src/core/src/pass/constant_folding.cpp b/src/core/src/pass/constant_folding.cpp index 3de91829f91b0c..cc1a7cea5b5add 100644 --- a/src/core/src/pass/constant_folding.cpp +++ b/src/core/src/pass/constant_folding.cpp @@ -105,6 +105,21 @@ bool ov::pass::ConstantFolding::run_on_model(const std::shared_ptr& m for (const auto& original_node : model->get_ordered_ops()) { auto node = original_node; + if (!original_node->can_constant_fold(original_node->input_values())) { + if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { + // recursively constant fold operators containing subgraphs (ie: TensorIterator, Loop) + size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size(); + for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) { + rewritten = + run_on_model(sub_graph_node->get_function(static_cast(sub_graph_ind))) || rewritten; + } + } + rewritten = restore_original_input_precision(original_node) || rewritten; + if (rewritten) { + original_node->validate_and_infer_types(); + } + continue; + } if (node_has_requires_precision_conversion_attribute(node)) { remove_requires_precision_conversion_attribute(node); node = util::convert_to_supported_precision(node.get()); @@ -143,15 +158,6 @@ bool ov::pass::ConstantFolding::run_on_model(const std::shared_ptr& m } } } else { - if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { - // recursively constant fold operators containing subgraphs (ie: TensorIterator, Loop) - size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size(); - for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) { - rewritten = - run_on_model(sub_graph_node->get_function(static_cast(sub_graph_ind))) || rewritten; - } - } - // if CF was unsuccessful remove original precision attribute from inputs bool restored = restore_original_input_precision(original_node); if (restored) { diff --git a/src/core/src/pass/low_latency.cpp b/src/core/src/pass/low_latency.cpp index 7ef4fcd5e84cd8..4b9f606b3e4b28 100644 --- a/src/core/src/pass/low_latency.cpp +++ b/src/core/src/pass/low_latency.cpp @@ -158,26 +158,7 @@ std::vector> process_sequence(const std::sha std::shared_ptr cell; std::vector> new_assigns; bool unroll = false; - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto lstm_seq_v0 = std::dynamic_pointer_cast(op)) { - unroll = need_unroll(op); - new_assigns = replace_with_memory(op, {1, 2}, m_use_const_initializer, to); - if (unroll) { - auto inputs = prepare_inputs(op, 3, to); - cell = to.make(inputs[0], - inputs[1], - inputs[2], - inputs[3], - inputs[4], - inputs[5], - lstm_seq_v0->get_hidden_size(), - lstm_seq_v0->get_activations(), - lstm_seq_v0->get_activations_alpha(), - lstm_seq_v0->get_activations_beta(), - lstm_seq_v0->get_clip_threshold()); - } - OPENVINO_SUPPRESS_DEPRECATED_END - } else if (auto lstm_seq_v5 = std::dynamic_pointer_cast(op)) { + if (auto lstm_seq_v5 = std::dynamic_pointer_cast(op)) { unroll = need_unroll(op); new_assigns = replace_with_memory(op, {1, 2}, m_use_const_initializer, to); if (unroll) { diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index 409dcad066d7a6..f179630b155d22 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -23,6 +23,7 @@ #include "openvino/pass/constant_folding.hpp" #include "openvino/reference/convert.hpp" #include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/compute_hash.hpp" #include "openvino/runtime/string_aligned_buffer.hpp" #include "openvino/util/file_util.hpp" #include "pugixml.hpp" @@ -30,6 +31,18 @@ #include "transformations/rt_info/disable_fp16_compression.hpp" #include "transformations/rt_info/primitives_priority_attribute.hpp" +namespace ov { +class OstreamHashWrapperBin final : public std::streambuf { + uint64_t m_res = 0lu; + +public: + uint64_t getResult() const { + return m_res; + } + std::streamsize xsputn(const char* s, std::streamsize n) override; +}; +} // namespace ov + namespace { // helpers template std::string join(const Container& c, const char* glue = ", ") { @@ -69,23 +82,6 @@ std::string translate_type_name(const std::string& name) { return name; } -size_t hash_combine(const void* v, int64_t size) { - constexpr auto cel_size = sizeof(size_t); - auto seed = static_cast(size); - const auto data = static_cast(v); - const auto d_end = std::next(data, size / cel_size); - // The constant value used as a magic number has been - // traditionally used e.g. in boost library's hash_combine. - // It happens to be derived from the golden ratio. - for (auto d = data; d != d_end; ++d) { - seed ^= *d + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - size_t last_bytes{0}; - std::memcpy(&last_bytes, d_end, size % cel_size); - seed ^= last_bytes + 0x9e3779b9 + (seed << 6) + (seed >> 2); - return seed; -} - class ConstantWriter { public: using FilePosition = int64_t; @@ -95,16 +91,18 @@ class ConstantWriter { ConstantWriter(std::ostream& bin_data, bool enable_compression = true) : m_binary_output(bin_data), m_enable_compression(enable_compression), - m_blob_offset(bin_data.tellp()) {} + m_blob_offset(bin_data.tellp()) { + m_write_hash_value = (dynamic_cast(bin_data.rdbuf())) ? true : false; + } FilePosition write(const char* ptr, size_t size, - size_t* new_size, + size_t& new_size, bool compress_to_fp16 = false, ov::element::Type src_type = ov::element::dynamic) { const FilePosition write_pos = m_binary_output.tellp(); const auto offset = write_pos - m_blob_offset; - *new_size = size; + new_size = size; if (!m_enable_compression) { if (!compress_to_fp16) { @@ -112,7 +110,7 @@ class ConstantWriter { } else { OPENVINO_ASSERT(size % src_type.size() == 0); auto fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size); - m_binary_output.write(fp16_buffer.get(), *new_size); + m_binary_output.write(fp16_buffer.get(), new_size); } return offset; } else { @@ -132,18 +130,24 @@ class ConstantWriter { // the same hash for {2, 2} and {0, 128} arrays. // But even strong hashing algorithms sometimes give collisions. // Therefore we always have to compare values when finding a match in the hash multimap. - const HashValue hash = hash_combine(ptr_to_write, *new_size); + const HashValue hash = ov::runtime::compute_hash(ptr_to_write, new_size); + auto found = m_hash_to_file_positions.find(hash); // iterate over all matches of the key in the multimap while (found != m_hash_to_file_positions.end()) { - if (memcmp(ptr, found->second.second, size) == 0) + if (memcmp(ptr, found->second.second, size) == 0) { return found->second.first; + } found++; } // Since fp16_compressed data will be disposed at exit point and since we cannot reread it from the ostream, // we store pointer to the original uncompressed blob. m_hash_to_file_positions.insert({hash, {offset, static_cast(ptr)}}); - m_binary_output.write(ptr_to_write, *new_size); + if (m_write_hash_value) { + m_binary_output.write(reinterpret_cast(&hash), sizeof(uint64_t)); + } else { + m_binary_output.write(ptr_to_write, new_size); + } } return offset; } @@ -152,17 +156,17 @@ class ConstantWriter { static std::unique_ptr compress_data_to_fp16(const char* ptr, size_t size, ov::element::Type src_type, - size_t* compressed_size) { + size_t& compressed_size) { auto num_src_elements = size / src_type.size(); - *compressed_size = num_src_elements * ov::element::f16.size(); + compressed_size = num_src_elements * ov::element::f16.size(); if (src_type == ov::element::f32) { - auto new_ptr = std::unique_ptr(new char[*compressed_size]); + auto new_ptr = std::unique_ptr(new char[compressed_size]); auto dst_data = reinterpret_cast(new_ptr.get()); auto src_data = reinterpret_cast(ptr); ov::reference::convert_from_f32_to_f16_with_clamp(src_data, dst_data, num_src_elements); return new_ptr; } else if (src_type == ov::element::f64) { - auto new_ptr = std::unique_ptr(new char[*compressed_size]); + auto new_ptr = std::unique_ptr(new char[compressed_size]); auto dst_data = reinterpret_cast(new_ptr.get()); auto src_data = reinterpret_cast(ptr); @@ -188,6 +192,7 @@ class ConstantWriter { ConstWritePositions m_hash_to_file_positions; std::ostream& m_binary_output; bool m_enable_compression; + bool m_write_hash_value = false; FilePosition m_blob_offset; // blob offset inside output stream }; @@ -531,7 +536,7 @@ class XmlSerializer : public ov::AttributeVisitor { int64_t offset = m_constant_write_handler.write(reinterpret_cast(header_ptr.get()), header_size, - &inter_size, + inter_size, m_compress_to_fp16, m_output_element_type); new_size += inter_size; @@ -554,7 +559,7 @@ class XmlSerializer : public ov::AttributeVisitor { m_constant_write_handler.write(raw_string_ptr, raw_string_size, - &inter_size, + inter_size, m_compress_to_fp16, m_output_element_type); new_size += inter_size; @@ -568,7 +573,7 @@ class XmlSerializer : public ov::AttributeVisitor { size_t new_size; int64_t offset = m_constant_write_handler.write(static_cast(a->get()->get_ptr()), size, - &new_size, + new_size, m_compress_to_fp16, m_output_element_type); @@ -1168,7 +1173,7 @@ void ngfunction_2_ir(pugi::xml_node& netXml, pugi::xml_node rt_info_node = netXml.append_child("rt_info"); for (const auto& it : model.get_rt_info()) { // Skip IR version - if (it.first == "version") + if (it.first == "version" || it.first == "__weights_path") continue; serialize_rt_info(rt_info_node, it.first, it.second); } @@ -1393,10 +1398,19 @@ bool pass::StreamSerialize::run_on_model(const std::shared_ptr& model /// -------- Hash calculation pass ------------- namespace { -template -static uint64_t hash_combine(uint64_t seed, const T& a) { - // Hash combine formula from boost - return seed ^ (std::hash()(a) + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +// Hash combine formula from boost for uint64_t. +inline uint64_t hash_combine(uint64_t h, uint64_t k) { + constexpr uint64_t m = 0xc6a4a7935bd1e995; + constexpr int r = 47; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + + return h + 0xe6546b64; } class OstreamHashWrapper final : public std::streambuf { @@ -1408,28 +1422,23 @@ class OstreamHashWrapper final : public std::streambuf { } std::streamsize xsputn(const char* s, std::streamsize n) override { - // Reinterpret data as uint32_t and accumulate in uint64_t to avoid overflow fluctuations in parallel_sum. - auto* int_sum = reinterpret_cast(s); - const uint64_t n32 = n / sizeof(uint32_t); - - m_res += parallel_sum(n32, uint64_t(0lu), [&](size_t k) -> uint32_t { - return int_sum[k]; - }); - - const uint64_t rest = n % sizeof(uint32_t); - for (uint64_t i = 0lu; i < rest; i++) { - m_res += s[n - rest + i]; - } + uint64_t h = ov::runtime::compute_hash(s, n); + m_res = hash_combine(m_res, h); return n; } }; } // namespace +std::streamsize OstreamHashWrapperBin::xsputn(const char* s, std::streamsize n) { + m_res = hash_combine(m_res, *reinterpret_cast(s)); + return n; +} + bool pass::Hash::run_on_model(const std::shared_ptr& model) { RUN_ON_MODEL_SCOPE(Hash); OstreamHashWrapper xmlHash; - OstreamHashWrapper binHash; + OstreamHashWrapperBin binHash; std::ostream xml(&xmlHash); std::ostream bin(&binHash); diff --git a/src/core/src/preprocess/preprocess_impls.cpp b/src/core/src/preprocess/preprocess_impls.cpp index 13a4c6f1353312..cbe18a78beb575 100644 --- a/src/core/src/preprocess/preprocess_impls.cpp +++ b/src/core/src/preprocess/preprocess_impls.cpp @@ -385,13 +385,6 @@ void OutputInfo::OutputInfoImpl::build(ov::ResultVector& results) { std::to_string(result->get_input_source_output(0).get_index())); } - OPENVINO_SUPPRESS_DEPRECATED_START - const auto tensor_name = ov::descriptor::get_ov_tensor_legacy_name(result->get_input_tensor(0)); - if (!tensor_name.empty()) { - ov::descriptor::set_ov_tensor_legacy_name(node.get_tensor(), tensor_name); - } - OPENVINO_SUPPRESS_DEPRECATED_END - // Reset friendly name of input node to avoid names collision // when there is at a new node inserted by post-processing steps // If no new nodes are inserted by post-processing, then we need to preserve friendly name of input diff --git a/src/core/src/runtime/compute_hash.cpp b/src/core/src/runtime/compute_hash.cpp new file mode 100644 index 00000000000000..c1a5a40c8638de --- /dev/null +++ b/src/core/src/runtime/compute_hash.cpp @@ -0,0 +1,918 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// The CRC computation is used for x86. +// The calculations were taken from the article +// "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction - Intel (December, 2009)". + +#include "openvino/runtime/compute_hash.hpp" + +#include +#include +#include + +#include "openvino/core/visibility.hpp" + +#if !defined(OS_CHROMEOS) && (defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)) +# define OV_CORE_USE_XBYAK_JIT +#endif + +#ifdef OV_CORE_USE_XBYAK_JIT +# include "openvino/core/parallel.hpp" +# include "openvino/reference/utils/registers_pool.hpp" +#endif // OV_CORE_USE_XBYAK_JIT + +namespace ov { +namespace runtime { + +#ifdef OV_CORE_USE_XBYAK_JIT + +using namespace ov::reference::jit; + +namespace jit { + +# define GET_OFF(field) offsetof(ComputeHashCallArgs, field) +# define getReg64() RegistersPool::Reg(m_registers_pool) +# define getVmm() RegistersPool::Reg(m_registers_pool) +# define getXmm() RegistersPool::Reg(m_registers_pool) + +enum KernelType { SINGLE_THREAD = 0, FIRST_THREAD, N_THREAD, FINAL_FOLD }; + +struct ComputeHashCompileParams { + KernelType type; +}; + +struct ComputeHashCallArgs { + const void* src_ptr = nullptr; + void* dst_ptr = nullptr; + const void* k_ptr = nullptr; + void* intermediate_ptr = nullptr; + uint64_t work_amount = 0lu; + uint64_t size = 0lu; + uint64_t threads_num = 1lu; +}; + +typedef void (*hash_kernel)(const ComputeHashCallArgs*); + +static const uint8_t SHUF_MASK[16] = {0b00001111, + 0b00001110, + 0b00001101, + 0b00001100, + 0b00001011, + 0b00001010, + 0b00001001, + 0b00001000, + 0b00000111, + 0b00000110, + 0b00000101, + 0b00000100, + 0b00000011, + 0b00000010, + 0b00000001, + 0b00000000}; + +constexpr uint64_t CRC_VAL = 0xffffffffffffffff; + +// POLYNOM(x) = 0x42F0E1EBA9EA3693 +constexpr uint64_t K_2 = 0x05f5c3c7eb52fab6; // x^(64*2) +constexpr uint64_t P_1 = 0x578d29d06cc4f872; // floor(x^128/P(x))-x^64 +constexpr uint64_t P_2 = 0x42f0e1eba9ea3693; // P(x)-x^64 +static const uint64_t K_PULL[] = { + K_2, // x^(64*2) + 0x4eb938a7d257740e, // x^(64*3) + 0x571bee0a227ef92b, // x^(64*4) + 0x44bef2a201b5200c, // x^(64*5) + 0x54819d8713758b2c, // x^(64*6) + 0x4a6b90073eb0af5a, // x^(64*7) + 0x5f6843ca540df020, // x^(64*8) + 0xddf4b6981205b83f, // x^(64*9) + 0x097c516e98bd2e73, // x^(64*10) + 0x0b76477b31e22e7b, // x^(64*11) + 0x9af04e1eff82d0dd, // x^(64*12) + 0x6e82e609297f8fe8, // x^(64*13) + 0xe464f4df5fb60ac1, // x^(64*14) + 0xb649c5b35a759cf2, // x^(64*15) + 0x05cf79dea9ac37d6, // x^(64*16) + 0x001067e571d7d5c2 // x^(64*17) +}; + +constexpr uint64_t K_2_3_OFF = 0lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_4_5_OFF = 1lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_6_7_OFF = 2lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_8_9_OFF = 3lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_10_11_OFF = 4lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_12_13_OFF = 5lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_14_15_OFF = 6lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_16_17_OFF = 7lu * 2lu * sizeof(uint64_t); + +class HashBase : public Generator { +protected: + void (*ker_fn)(const ComputeHashCallArgs*); + +public: + HashBase(cpu_isa_t isa) : Generator(isa) {} + + virtual void generate() = 0; + + void operator()(const ComputeHashCallArgs* args) { + ker_fn(args); + } + + virtual void create_kernel() { + generate(); + ker_fn = (decltype(ker_fn))getCode(); + OPENVINO_ASSERT(ker_fn, "[ CORE ] Could not generate kernel code."); + } +}; + +template +class ComputeHash : public HashBase { +public: + explicit ComputeHash(const ComputeHashCompileParams& jcp) : HashBase(isa), m_jcp(jcp) { + if (!mayiuse(cpu_isa_t::pclmulqdq)) { + OPENVINO_THROW( + "The current CPU does not support pclmulqdq instruction, which is required for the CRC algorithm."); + } + if (mayiuse(cpu_isa_t::vpclmulqdq)) { + is_vpclmulqdq = true; + } + } + + void generate() override { + m_registers_pool = RegistersPool::create(isa, {rax, rcx, rsp, rdi, k0}); + + r64_src_ptr = getReg64(); + r64_dst_ptr = getReg64(); + r64_work_amount = getReg64(); + r64_k_ptr = getReg64(); + r64_aux = getReg64(); + v_k_2_3 = getVmm(); + v_shuf_mask = getVmm(); + auto v_dst = getVmm(); + + this->preamble(); + + initialize(v_dst); + bulk_fold(v_dst); + join(v_dst); + fold_to_128(v_dst); + fold_to_64(v_dst); + + this->postamble(); + m_registers_pool.reset(); + } + + static std::shared_ptr create(const ComputeHashCompileParams& params) { + auto kernel = std::make_shared(params); + OPENVINO_ASSERT(kernel, "[ CORE ] Could not create ComputeHash kernel."); + kernel->create_kernel(); + + return kernel; + } + +private: + using Vmm = typename std::conditional::type; + bool is_vpclmulqdq = false; + + ComputeHashCompileParams m_jcp; + RegistersPool::Ptr m_registers_pool; + + const Xbyak::Reg64 r64_params = abi_param1; + + RegistersPool::Reg r64_src_ptr; + RegistersPool::Reg r64_dst_ptr; + RegistersPool::Reg r64_work_amount; + RegistersPool::Reg r64_k_ptr; + RegistersPool::Reg r64_aux; + + // Vector registers + RegistersPool::Reg v_k_2_3; + RegistersPool::Reg v_shuf_mask; + + void initialize(const Vmm& v_dst); + + void bulk_fold(const Vmm& v_dst); + + void join(const Vmm& v_dst); + + void fold_to_128(const Vmm& v_dst); + + void fold_to_64(const Vmm& v_dst); + + void uni_vpxorq(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src_0, const Xbyak::Xmm& v_src_1); + + void uni_vmovdqu64(const Xbyak::Xmm& v_dst, const Xbyak::Operand& v_src_0); + + void uni_vmovdqu64(const Xbyak::Address& v_dst, const Xbyak::Xmm& v_src_0); + + void uni_vbroadcasti64x2(const Xbyak::Ymm& v_dst, const Xbyak::Address& v_src_0); + + void partial_load(const Xbyak::Xmm& xmm_dst, const Xbyak::Address& src_addr, const Xbyak::Reg64& r64_load_num); + + void partial_load(const Xbyak::Ymm& ymm_dst, const Xbyak::Address& src_addr, const Xbyak::Reg64& r64_load_num); +}; + +template <> +void ComputeHash::uni_vpxorq(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src_0, + const Xbyak::Xmm& v_src_1) { + vpxorq(v_dst, v_src_0, v_src_1); +} +template +void ComputeHash::uni_vpxorq(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src_0, const Xbyak::Xmm& v_src_1) { + vpxor(v_dst, v_src_0, v_src_1); +} +template <> +void ComputeHash::uni_vmovdqu64(const Xbyak::Xmm& v_dst, const Xbyak::Operand& v_src_0) { + vmovdqu64(v_dst, v_src_0); +} +template +void ComputeHash::uni_vmovdqu64(const Xbyak::Xmm& v_dst, const Xbyak::Operand& v_src_0) { + vmovdqu(v_dst, v_src_0); +} +template <> +void ComputeHash::uni_vmovdqu64(const Xbyak::Address& v_dst, const Xbyak::Xmm& v_src_0) { + vmovdqu64(v_dst, v_src_0); +} +template +void ComputeHash::uni_vmovdqu64(const Xbyak::Address& v_dst, const Xbyak::Xmm& v_src_0) { + vmovdqu(v_dst, v_src_0); +} +template <> +void ComputeHash::uni_vbroadcasti64x2(const Xbyak::Ymm& v_dst, const Xbyak::Address& v_src_0) { + vbroadcasti64x2(v_dst, v_src_0); +} +template +void ComputeHash::uni_vbroadcasti64x2(const Xbyak::Ymm& v_dst, const Xbyak::Address& v_src_0) { + vbroadcasti128(v_dst, v_src_0); +} +template <> +void ComputeHash::partial_load(const Xbyak::Xmm& xmm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + Xbyak::Label l_mv_mask; + auto rOnes = getReg64(); + auto k_load_mask = RegistersPool::Reg(m_registers_pool); + + mov(rOnes, 0xFFFFFFFFFFFFFFFF); + cmp(r64_load_num, 0x3f); + jg(l_mv_mask); + + shlx(rOnes, rOnes, r64_load_num); + not_(rOnes); + + L(l_mv_mask); + kmovq(k_load_mask, rOnes); + + vmovdqu8(Vmm(xmm_dst.getIdx()) | k_load_mask | T_z, ptr[r64_src_ptr]); +} +template +void ComputeHash::partial_load(const Xbyak::Xmm& xmm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + Xbyak::Label l_partial, l_end; + + cmp(r64_load_num, xmm_len); + jl(l_partial, T_NEAR); + uni_vmovdqu64(xmm_dst, ptr[src_addr.getRegExp()]); + jmp(l_end, T_NEAR); + + L(l_partial); + { + uni_vpxorq(xmm_dst, xmm_dst, xmm_dst); + for (size_t j = 0lu; j < xmm_len - 1; j++) { + cmp(r64_load_num, static_cast(j)); + jle(l_end, T_NEAR); + pinsrb(xmm_dst, ptr[src_addr.getRegExp() + j], static_cast(j)); + } + } + + L(l_end); +} +template <> +void ComputeHash::partial_load(const Xbyak::Ymm& xmm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + partial_load(Xbyak::Xmm(xmm_dst.getIdx()), src_addr, r64_load_num); +} +template +void ComputeHash::partial_load(const Xbyak::Ymm& ymm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + Xbyak::Label l_xmm, l_partial, l_end; + auto xmm_dst = Xbyak::Xmm(ymm_dst.getIdx()); + + cmp(r64_load_num, ymm_len); + jl(l_xmm, T_NEAR); + uni_vmovdqu64(ymm_dst, ptr[src_addr.getRegExp()]); + jmp(l_end, T_NEAR); + + L(l_xmm); + uni_vpxorq(ymm_dst, ymm_dst, ymm_dst); + cmp(r64_load_num, xmm_len); + jl(l_partial, T_NEAR); + uni_vmovdqu64(xmm_dst, ptr[src_addr.getRegExp()]); + je(l_end, T_NEAR); + + { + Xbyak::Label l_rest_loop, l_perm; + + vperm2i128(ymm_dst, ymm_dst, ymm_dst, 0x1); + for (size_t j = 0lu; j < xmm_len - 1lu; j++) { + cmp(r64_load_num, static_cast(xmm_len + j)); + jle(l_perm, T_NEAR); + pinsrb(xmm_dst, ptr[src_addr.getRegExp() + xmm_len + j], static_cast(j)); + } + L(l_perm); + vperm2i128(ymm_dst, ymm_dst, ymm_dst, 0x1); + } + jmp(l_end, T_NEAR); + + L(l_partial); + { + for (size_t j = 0lu; j < xmm_len - 1; j++) { + cmp(r64_load_num, static_cast(j)); + jle(l_end, T_NEAR); + pinsrb(xmm_dst, ptr[src_addr.getRegExp() + j], static_cast(j)); + } + } + + L(l_end); +} + +template +void ComputeHash::initialize(const Vmm& v_dst) { + mov(r64_src_ptr, ptr[r64_params + GET_OFF(src_ptr)]); + mov(r64_dst_ptr, ptr[r64_params + GET_OFF(dst_ptr)]); + mov(r64_k_ptr, ptr[r64_params + GET_OFF(k_ptr)]); + mov(r64_work_amount, ptr[r64_params + GET_OFF(work_amount)]); + + uni_vbroadcasti64x2(v_k_2_3, ptr[r64_k_ptr + K_2_3_OFF]); + + mov(r64_aux, reinterpret_cast(SHUF_MASK)); + uni_vbroadcasti64x2(v_shuf_mask, ptr[r64_aux]); + + if (m_jcp.type == SINGLE_THREAD || m_jcp.type == FIRST_THREAD) { + auto xmm_dst = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_aux = getXmm(); + + // Initial CRC + mov(r64_aux, ptr[r64_params + GET_OFF(size)]); + vpinsrq(xmm_aux, xmm_aux, r64_aux, 0x0); + mov(r64_aux, CRC_VAL); + vpinsrq(xmm_aux, xmm_aux, r64_aux, 0x1); + + // First xor with source. + partial_load(v_dst, ptr[r64_src_ptr], r64_work_amount); + vpshufb(v_dst, v_dst, v_shuf_mask); + pxor(xmm_dst, xmm_aux); // The SSE version is used to avoid zeroing out the rest of the Vmm. + if (m_jcp.type == SINGLE_THREAD) { + add(r64_src_ptr, xmm_len); + } + } else if (m_jcp.type == N_THREAD) { + uni_vmovdqu64(v_dst, ptr[r64_src_ptr]); + vpshufb(v_dst, v_dst, v_shuf_mask); + } + if (m_jcp.type == SINGLE_THREAD || m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + sub(r64_work_amount, xmm_len); + } +} + +template <> +void ComputeHash::bulk_fold(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FIRST_THREAD && m_jcp.type != N_THREAD) { + return; + } + Xbyak::Label l_fold_loop, l_end; + cmp(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + jl(l_end, T_NEAR); + + auto v_src_0 = getVmm(); + auto v_dst_0 = getVmm(); + auto v_dst_1 = getVmm(); + auto v_dst_2 = getVmm(); + auto& v_dst_3 = v_dst; + auto v_k_loop = getVmm(); + auto v_aux_0 = getVmm(); + + auto xmm_src_0 = Xbyak::Xmm(v_src_0.getIdx()); + auto xmm_src_1 = getXmm(); + auto xmm_dst_0 = Xbyak::Xmm(v_dst_0.getIdx()); + auto xmm_dst_1 = Xbyak::Xmm(v_dst_1.getIdx()); + auto xmm_dst_2 = Xbyak::Xmm(v_dst_2.getIdx()); + auto xmm_dst_3 = Xbyak::Xmm(v_dst_3.getIdx()); + auto xmm_k_loop = Xbyak::Xmm(v_k_loop.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_aux_0 = Xbyak::Xmm(v_aux_0.getIdx()); + + RegistersPool::Reg r64_bulk_step; + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + r64_bulk_step = getReg64(); + mov(r64_bulk_step, ptr[r64_params + GET_OFF(threads_num)]); + sal(r64_bulk_step, static_cast(std::log2(get_vlen()))); // * vlen + } + + if (m_jcp.type == SINGLE_THREAD) { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_8_9_OFF]); + } else { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_16_17_OFF]); + } + + uni_vmovdqu64(v_dst_0, v_dst); + + if (!is_vpclmulqdq) { + vextracti64x2(xmm_dst_1, v_dst_0, 0x1); + vextracti64x2(xmm_dst_2, v_dst_0, 0x2); + vextracti64x2(xmm_dst_3, v_dst_0, 0x3); + } + + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + add(r64_src_ptr, r64_bulk_step); + prefetcht2(ptr[r64_src_ptr + 16384]); + } else { + add(r64_src_ptr, static_cast(get_vlen() - xmm_len)); + prefetcht2(ptr[r64_src_ptr + 4096]); + } + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + sub(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + + L(l_fold_loop); + { + uni_vmovdqu64(v_src_0, ptr[r64_src_ptr]); + vpshufb(v_src_0, v_src_0, v_shuf_mask); + + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + add(r64_src_ptr, r64_bulk_step); + prefetcht2(ptr[r64_src_ptr + 16384]); + } else { + add(r64_src_ptr, static_cast(get_vlen())); + prefetcht2(ptr[r64_src_ptr + 4096]); + } + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + if (is_vpclmulqdq) { + vpclmulqdq(v_aux_0, v_dst_0, v_k_loop, 0b00000000); + vpclmulqdq(v_dst_0, v_dst_0, v_k_loop, 0b00010001); + uni_vpxorq(v_aux_0, v_aux_0, v_src_0); + uni_vpxorq(v_dst_0, v_dst_0, v_aux_0); + } else { + // 0 + vpclmulqdq(xmm_aux_0, xmm_dst_0, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_0); + uni_vpxorq(xmm_dst_0, xmm_dst_0, xmm_aux_0); + + // 1 + vextracti64x2(xmm_src_1, v_src_0, 0x1); + vpclmulqdq(xmm_aux_0, xmm_dst_1, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_1, xmm_dst_1, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_aux_0); + + // 2 + vextracti64x2(xmm_src_1, v_src_0, 0x2); + vpclmulqdq(xmm_aux_0, xmm_dst_2, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_2, xmm_dst_2, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_2, xmm_dst_2, xmm_aux_0); + + // 3 + vextracti64x2(xmm_src_1, v_src_0, 0x3); + vpclmulqdq(xmm_aux_0, xmm_dst_3, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_3, xmm_dst_3, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + } + + sub(r64_work_amount, static_cast(get_vlen())); + jge(l_fold_loop, T_NEAR); + } + add(r64_work_amount, static_cast(get_vlen())); + + if (m_jcp.type == SINGLE_THREAD) { + if (is_vpclmulqdq) { + vextracti64x2(xmm_dst_1, v_dst_0, 0x1); + vextracti64x2(xmm_dst_2, v_dst_0, 0x2); + vextracti64x2(xmm_dst_3, v_dst_0, 0x3); + } + + vpclmulqdq(xmm_aux_0, xmm_dst_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00010001); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_dst_0); + + vpclmulqdq(xmm_aux_0, xmm_dst_1, ptr[r64_k_ptr + K_4_5_OFF], 0b00000000); + vpclmulqdq(xmm_dst_1, xmm_dst_1, ptr[r64_k_ptr + K_4_5_OFF], 0b00010001); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_dst_1); + + vpclmulqdq(xmm_aux_0, xmm_dst_2, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst_2, xmm_dst_2, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_dst_2); + } else { + if (is_vpclmulqdq) { + uni_vmovdqu64(ptr[r64_dst_ptr], v_dst_0); + } else { + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 0lu], xmm_dst_0); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 1lu], xmm_dst_1); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 2lu], xmm_dst_2); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 3lu], xmm_dst_3); + } + } + + L(l_end); +} + +template +void ComputeHash::bulk_fold(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FIRST_THREAD && m_jcp.type != N_THREAD) { + return; + } + Xbyak::Label l_fold_loop, l_end; + cmp(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + jl(l_end, T_NEAR); + + auto v_src_0 = getVmm(); + auto v_dst_0 = getVmm(); + auto& v_dst_1 = v_dst; + auto v_aux_0 = getVmm(); + auto v_k_loop = getVmm(); + + auto xmm_src_0 = Xbyak::Xmm(v_src_0.getIdx()); + auto xmm_src_1 = getXmm(); + auto xmm_dst_0 = Xbyak::Xmm(v_dst_0.getIdx()); + auto xmm_dst_1 = Xbyak::Xmm(v_dst_1.getIdx()); + auto xmm_k_loop = Xbyak::Xmm(v_k_loop.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_aux_0 = Xbyak::Xmm(v_aux_0.getIdx()); + + RegistersPool::Reg r64_bulk_step; + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + r64_bulk_step = getReg64(); + mov(r64_bulk_step, ptr[r64_params + GET_OFF(threads_num)]); + sal(r64_bulk_step, static_cast(std::log2(get_vlen()))); // * vlen + } + + if (m_jcp.type == SINGLE_THREAD) { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_4_5_OFF]); + } else { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_8_9_OFF]); + } + + uni_vmovdqu64(v_dst_0, v_dst); + + if (!is_vpclmulqdq) { + vextracti128(xmm_dst_1, v_dst_0, 0x1); + } + + if (m_jcp.type == SINGLE_THREAD) { + add(r64_src_ptr, static_cast(get_vlen() - xmm_len)); + } else { + add(r64_src_ptr, r64_bulk_step); + } + prefetcht2(ptr[r64_src_ptr + 4096]); + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + sub(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + + L(l_fold_loop); + { + uni_vmovdqu64(v_src_0, ptr[r64_src_ptr]); + vpshufb(v_src_0, v_src_0, v_shuf_mask); + + if (m_jcp.type == SINGLE_THREAD) { + add(r64_src_ptr, static_cast(get_vlen())); + } else { + add(r64_src_ptr, r64_bulk_step); + } + prefetcht2(ptr[r64_src_ptr + 4096]); + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + if (is_vpclmulqdq) { + vpclmulqdq(v_aux_0, v_dst_0, v_k_loop, 0b00000000); + vpclmulqdq(v_dst_0, v_dst_0, v_k_loop, 0b00010001); + uni_vpxorq(v_aux_0, v_aux_0, v_src_0); + uni_vpxorq(v_dst_0, v_dst_0, v_aux_0); + } else { + // 0 + vpclmulqdq(xmm_aux_0, xmm_dst_0, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_0); + uni_vpxorq(xmm_dst_0, xmm_dst_0, xmm_aux_0); + // 1 + vextracti128(xmm_src_1, v_src_0, 0x1); + vpclmulqdq(xmm_aux_0, xmm_dst_1, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_1, xmm_dst_1, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_aux_0); + } + + sub(r64_work_amount, static_cast(get_vlen())); + jge(l_fold_loop, T_NEAR); + } + add(r64_work_amount, static_cast(get_vlen())); + + if (m_jcp.type == SINGLE_THREAD) { + if (is_vpclmulqdq) { + vextracti128(xmm_dst_1, v_dst_0, 0x1); + } + vpclmulqdq(xmm_aux_0, xmm_dst_0, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_aux_0); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_dst_0); + } else { + if (is_vpclmulqdq) { + uni_vmovdqu64(ptr[r64_dst_ptr], v_dst_0); + } else { + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 0lu], xmm_dst_0); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 1lu], xmm_dst_1); + } + } + + L(l_end); +} + +template <> +void ComputeHash::join(const Vmm& v_dst) { + if (m_jcp.type != FINAL_FOLD) { + return; + } + + mov(r64_aux, ptr[r64_params + GET_OFF(intermediate_ptr)]); + prefetcht0(ptr[r64_aux + 1024]); + + auto xmm_src_0 = getXmm(); + auto xmm_src_last = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_aux_0 = getXmm(); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + + uni_vmovdqu64(xmm_src_last, ptr[r64_aux + xmm_len * 7]); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_14_15_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_14_15_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_12_13_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_12_13_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 2lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_10_11_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_10_11_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 3lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_8_9_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_8_9_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 4lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 5lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 6lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); +} + +template +void ComputeHash::join(const Vmm& v_dst) { + if (m_jcp.type != FINAL_FOLD) { + return; + } + + mov(r64_aux, ptr[r64_params + GET_OFF(intermediate_ptr)]); + prefetcht0(ptr[r64_aux + 1024]); + + auto xmm_src_0 = getXmm(); + auto xmm_src_last = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_aux_0 = getXmm(); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + + uni_vmovdqu64(xmm_src_last, ptr[r64_aux + xmm_len * 3]); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 0lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 1lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 2lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); +} + +template +void ComputeHash::fold_to_128(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FINAL_FOLD) { + return; + } + Xbyak::Label l_fold_loop, l_end; + cmp(r64_work_amount, xmm_len); + jl(l_end, T_NEAR); + + auto xmm_src = getXmm(); + auto xmm_dst = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_shuf_mask = Xbyak::Xmm(v_shuf_mask.getIdx()); + auto xmm_aux = getXmm(); + + L(l_fold_loop); + { + uni_vmovdqu64(xmm_src, ptr[r64_src_ptr]); + vpshufb(xmm_src, xmm_src, xmm_shuf_mask); + + vpclmulqdq(xmm_aux, xmm_dst, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst, xmm_dst, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + uni_vpxorq(xmm_dst, xmm_dst, xmm_src); + + add(r64_src_ptr, xmm_len); + sub(r64_work_amount, xmm_len); + cmp(r64_work_amount, xmm_len); + jge(l_fold_loop, T_NEAR); + } + + L(l_end); +} + +template +void ComputeHash::fold_to_64(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FINAL_FOLD) { + return; + } + Xbyak::Label l_fold_to_64; + cmp(r64_work_amount, 0); + jle(l_fold_to_64, T_NEAR); + + auto xmm_src = getXmm(); + auto xmm_dst = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_shuf_mask = Xbyak::Xmm(v_shuf_mask.getIdx()); + auto xmm_aux = getXmm(); + auto xmm_aux_1 = getXmm(); + auto xmm_aux_2 = getXmm(); + + partial_load(xmm_src, ptr[r64_src_ptr], r64_work_amount); + vpshufb(xmm_src, xmm_src, xmm_shuf_mask); + + vpclmulqdq(xmm_aux, xmm_dst, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst, xmm_dst, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_aux, xmm_aux, xmm_src); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + + L(l_fold_to_64); + + mov(r64_aux, K_2); + vpinsrq(xmm_aux, xmm_aux, r64_aux, 0x0); + vpclmulqdq(xmm_aux, xmm_dst, xmm_aux, 0b00000001); + vpslldq(xmm_dst, xmm_dst, 0x8); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + + mov(r64_aux, P_1); + vpinsrq(xmm_aux_2, xmm_aux_2, r64_aux, 0x0); + vpclmulqdq(xmm_aux, xmm_dst, xmm_aux_2, 0b00000001); + mov(r64_aux, 0x0); + vpinsrq(xmm_aux_1, xmm_dst, r64_aux, 0x0); + uni_vpxorq(xmm_aux, xmm_aux, xmm_aux_1); + vpinsrq(xmm_aux_1, xmm_aux, r64_aux, 0x0); + + mov(r64_aux, P_2); + vpinsrq(xmm_aux_2, xmm_aux_2, r64_aux, 0x1); + vpclmulqdq(xmm_aux, xmm_aux, xmm_aux_2, 0b00010001); + uni_vpxorq(xmm_aux, xmm_aux, xmm_aux_1); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + + vpextrq(ptr[r64_dst_ptr], xmm_dst, 0x0); +} + +} // namespace jit +#endif // OV_CORE_USE_XBYAK_JIT + +size_t compute_hash(const void* src, size_t size) { +#ifdef OV_CORE_USE_XBYAK_JIT + if (Generator::mayiuse(avx2)) { + uint64_t result = 0lu; + + // Parallel section + constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17 + const uint64_t size_u64 = static_cast(size); + if (size_u64 >= min_wa_per_thread * 2lu) { + static auto first_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::FIRST_THREAD}) + : jit::ComputeHash::create({jit::FIRST_THREAD}); + static auto n_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::N_THREAD}) + : jit::ComputeHash::create({jit::N_THREAD}); + static auto final_fold_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::FINAL_FOLD}) + : jit::ComputeHash::create({jit::FINAL_FOLD}); + + static const uint64_t max_thr_num = 2lu; + uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num); + const uint64_t el_per_thread = + first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen()); + std::vector intermediate(thr_num * first_thr_kernel->get_vlen()); + + parallel_nt_static(static_cast(thr_num), [&](const int ithr, const int nthr) { + uint64_t start = el_per_thread * ithr; + if (start >= size_u64) { + return; + } + uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread; + + jit::ComputeHashCallArgs args; + + args.src_ptr = reinterpret_cast(src) + first_thr_kernel->get_vlen() * ithr; + args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]); + args.k_ptr = jit::K_PULL; + args.work_amount = work_amount; + args.size = size_u64; + args.threads_num = thr_num; + + if (ithr == 0) { + (*first_thr_kernel)(&args); + } else { + (*n_thr_kernel)(&args); + } + }); + + jit::ComputeHashCallArgs args; + args.work_amount = size_u64 - el_per_thread * thr_num; + args.src_ptr = reinterpret_cast(src) + size_u64 - args.work_amount; + args.dst_ptr = &result; + args.k_ptr = jit::K_PULL; + args.size = size_u64; + args.intermediate_ptr = intermediate.data(); + + (*final_fold_kernel)(&args); + } else { + static auto single_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::SINGLE_THREAD}) + : jit::ComputeHash::create({jit::SINGLE_THREAD}); + + jit::ComputeHashCallArgs args; + args.src_ptr = src; + args.dst_ptr = &result; + args.k_ptr = jit::K_PULL; + args.work_amount = size_u64; + args.size = size_u64; + + (*single_thr_kernel)(&args); + } + + return result; + } + +#endif // OV_CORE_USE_XBYAK_JIT + + constexpr auto cel_size = sizeof(size_t); + size_t seed = size; + const auto data = static_cast(src); + const auto d_end = std::next(data, size / cel_size); + // The constant value used as a magic number has been + // traditionally used e.g. in boost library's hash_combine. + // It happens to be derived from the golden ratio. + for (auto d = data; d != d_end; ++d) { + seed ^= *d + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + size_t last_bytes{0}; + std::memcpy(&last_bytes, d_end, size % cel_size); + seed ^= last_bytes + 0x9e3779b9 + (seed << 6) + (seed >> 2); + + return seed; +} + +} // namespace runtime +} // namespace ov diff --git a/src/core/src/type/element_type.cpp b/src/core/src/type/element_type.cpp index c0fad90285e3f6..3e664335033d80 100644 --- a/src/core/src/type/element_type.cpp +++ b/src/core/src/type/element_type.cpp @@ -164,51 +164,6 @@ std::vector ov::element::Type::get_known_types() { return rc; } -ov::element::Type::Type(size_t bitwidth, - bool is_real, - bool is_signed, - bool is_quantized, - const std::string& /* cname */) { - const ElementTypes::ElementsMap elements_map{ - {ov::element::Type_t::undefined, - {std::numeric_limits::max(), false, false, false, "undefined", "undefined"}}, - {ov::element::Type_t::dynamic, {0, false, false, false, "dynamic", "dynamic"}}, - {ov::element::Type_t::boolean, {8, false, true, false, "char", "boolean"}}, - {ov::element::Type_t::bf16, {16, true, true, false, "bfloat16", "bf16"}}, - {ov::element::Type_t::f16, {16, true, true, false, "float16", "f16"}}, - {ov::element::Type_t::f32, {32, true, true, false, "float", "f32"}}, - {ov::element::Type_t::f64, {64, true, true, false, "double", "f64"}}, - {ov::element::Type_t::i4, {4, false, true, true, "int4_t", "i4"}}, - {ov::element::Type_t::i8, {8, false, true, true, "int8_t", "i8"}}, - {ov::element::Type_t::i16, {16, false, true, false, "int16_t", "i16"}}, - {ov::element::Type_t::i32, {32, false, true, true, "int32_t", "i32"}}, - {ov::element::Type_t::i64, {64, false, true, false, "int64_t", "i64"}}, - {ov::element::Type_t::u1, {1, false, false, false, "uint1_t", "u1"}}, - {ov::element::Type_t::u2, {2, false, false, false, "uint2_t", "u2"}}, - {ov::element::Type_t::u3, {3, false, false, false, "uint3_t", "u3"}}, - {ov::element::Type_t::u4, {4, false, false, false, "uint4_t", "u4"}}, - {ov::element::Type_t::u6, {6, false, false, false, "uint6_t", "u6"}}, - {ov::element::Type_t::u8, {8, false, false, true, "uint8_t", "u8"}}, - {ov::element::Type_t::u16, {16, false, false, false, "uint16_t", "u16"}}, - {ov::element::Type_t::u32, {32, false, false, false, "uint32_t", "u32"}}, - {ov::element::Type_t::u64, {64, false, false, false, "uint64_t", "u64"}}, - {ov::element::Type_t::nf4, {4, false, false, true, "nfloat4", "nf4"}}, - {ov::element::Type_t::f8e4m3, {8, true, true, true, "f8e4m3", "f8e4m3"}}, - {ov::element::Type_t::f8e5m2, {8, true, true, true, "f8e5m2", "f8e5m2"}}, - {ov::element::Type_t::string, {8 * sizeof(std::string), false, false, false, "string", "string"}}, - {ov::element::Type_t::f4e2m1, {4, true, true, true, "f4e2m1", "f4e2m1"}}, - {ov::element::Type_t::f8e8m0, {4, true, true, true, "f8e8m0", "f8e8m0"}}, - }; - for (const auto& t : elements_map) { - const TypeInfo& info = t.second; - if (bitwidth == info.m_bitwidth && is_real == info.m_is_real && is_signed == info.m_is_signed && - is_quantized == info.m_is_quantized) { - m_type = t.first; - return; - } - } -} - ov::element::Type::Type(const std::string& type) : Type(type_from_string(type)) {} std::string ov::element::Type::c_type_string() const { diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt index c3ed58783ac946..89acd7bd1809d0 100644 --- a/src/core/tests/CMakeLists.txt +++ b/src/core/tests/CMakeLists.txt @@ -18,6 +18,7 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/threading.cpp if(SUGGEST_OVERRIDE_SUPPORTED) set_source_files_properties(ov_tensor_test.cpp type_prop/multiclass_nms.cpp + type_prop/squeeze.cpp PROPERTIES COMPILE_OPTIONS -Wno-suggest-override) endif() diff --git a/src/core/tests/op.cpp b/src/core/tests/op.cpp index cf65eecc16cd4c..08fa7a97584e41 100644 --- a/src/core/tests/op.cpp +++ b/src/core/tests/op.cpp @@ -67,4 +67,5 @@ TEST(op, opset_multi_thread) { doTest(ov::get_opset13); doTest(ov::get_opset14); doTest(ov::get_opset15); + doTest(ov::get_opset16); } diff --git a/src/core/tests/opset.cpp b/src/core/tests/opset.cpp index 2df8bade6a2f2c..81f6e80c28189f 100644 --- a/src/core/tests/opset.cpp +++ b/src/core/tests/opset.cpp @@ -14,6 +14,7 @@ #include "openvino/opsets/opset13.hpp" #include "openvino/opsets/opset14.hpp" #include "openvino/opsets/opset15.hpp" +#include "openvino/opsets/opset16.hpp" #include "openvino/opsets/opset2.hpp" #include "openvino/opsets/opset3.hpp" #include "openvino/opsets/opset4.hpp" @@ -61,9 +62,9 @@ TEST_P(OpsetTests, opset_dump) { INSTANTIATE_TEST_SUITE_P(opset, OpsetTests, - testing::Values(OpsetTestParams{ov::get_opset1, 110}, - OpsetTestParams{ov::get_opset2, 112}, - OpsetTestParams{ov::get_opset3, 127}, + testing::Values(OpsetTestParams{ov::get_opset1, 109}, + OpsetTestParams{ov::get_opset2, 111}, + OpsetTestParams{ov::get_opset3, 126}, OpsetTestParams{ov::get_opset4, 137}, OpsetTestParams{ov::get_opset5, 145}, OpsetTestParams{ov::get_opset6, 152}, @@ -75,7 +76,8 @@ INSTANTIATE_TEST_SUITE_P(opset, OpsetTestParams{ov::get_opset12, 178}, OpsetTestParams{ov::get_opset13, 186}, OpsetTestParams{ov::get_opset14, 188}, - OpsetTestParams{ov::get_opset15, 15}), + OpsetTestParams{ov::get_opset15, 199}, + OpsetTestParams{ov::get_opset16, 4}), OpsetTestNameGenerator{}); class MyOpOld : public ov::op::Op { diff --git a/src/core/tests/pass/serialization/deterministicity.cpp b/src/core/tests/pass/serialization/deterministicity.cpp index 5bcfbf97b77890..8441da501eb9bf 100644 --- a/src/core/tests/pass/serialization/deterministicity.cpp +++ b/src/core/tests/pass/serialization/deterministicity.cpp @@ -193,6 +193,7 @@ TEST_P(SerializationDeterministicityInputOutputTest, FromOvModel) { auto& expected1 = modelRef; ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1, irVersion).run_on_model(modelRef); auto expected2 = ov::test::readModel(m_out_xml_path_1, m_out_bin_path_1); + ov::pass::Serialize(m_out_xml_path_2, m_out_bin_path_2, irVersion).run_on_model(expected2); EXPECT_EQ(input0Name, expected1->input(0).get_node()->get_friendly_name()); diff --git a/src/core/tests/type_prop/identity.cpp b/src/core/tests/type_prop/identity.cpp new file mode 100644 index 00000000000000..fee04f33ab60f1 --- /dev/null +++ b/src/core/tests/type_prop/identity.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/identity.hpp" + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/type_prop.hpp" +#include "openvino/op/constant.hpp" + +using namespace testing; + +namespace ov { +namespace test { + +class TypePropIdentityV16Test : public TypePropOpTest {}; + +TEST_F(TypePropIdentityV16Test, default_ctor) { + const auto data = op::v0::Constant::create(element::f64, Shape{2, 2}, {1.0f, 1.0f, 1.0f, 1.0f}); + const auto op = make_op(); + op->set_arguments(OutputVector{data}); + op->validate_and_infer_types(); + + EXPECT_EQ(op->get_input_size(), 1); + EXPECT_EQ(op->get_output_size(), 1); + EXPECT_EQ(op->get_output_element_type(0), element::f64); + EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({2, 2})); +} + +TEST_F(TypePropIdentityV16Test, input_data_ctor) { + const auto data = op::v0::Constant::create(element::i64, Shape{1, 2}, {1.0f, 1.0f}); + const auto op = make_op(data); + + EXPECT_EQ(op->get_input_size(), 1); + EXPECT_EQ(op->get_output_size(), 1); + EXPECT_EQ(op->get_output_element_type(0), element::i64); + EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({1, 2})); +} +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/core/tests/type_prop/lstm_sequence.cpp b/src/core/tests/type_prop/lstm_sequence.cpp index 698206e25a5562..80da26340fd144 100644 --- a/src/core/tests/type_prop/lstm_sequence.cpp +++ b/src/core/tests/type_prop/lstm_sequence.cpp @@ -89,42 +89,6 @@ shared_ptr lstm_seq_direction_initialization(const recurre return lstm_sequence; } - -OPENVINO_SUPPRESS_DEPRECATED_START -shared_ptr lstm_seq_v0_tensor_initialization(const recurrent_sequence_parameters& param) { - auto batch_size = param.batch_size; - auto seq_length = param.seq_length; - auto input_size = param.input_size; - auto num_directions = param.num_directions; - auto hidden_size = param.hidden_size; - auto et = param.et; - - const auto X = make_shared(et, PartialShape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(et, PartialShape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(et, PartialShape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(et, PartialShape{batch_size}); - const auto W = make_shared(et, PartialShape{num_directions, hidden_size * 4, input_size}); - const auto R = make_shared(et, PartialShape{num_directions, hidden_size * 4, hidden_size}); - const auto B = make_shared(et, PartialShape{num_directions, hidden_size * 4}); - const auto P = make_shared(et, PartialShape{num_directions, hidden_size * 3}); - - const auto lstm_sequence = make_shared(); - - lstm_sequence->set_argument(0, X); - lstm_sequence->set_argument(1, initial_hidden_state); - lstm_sequence->set_argument(2, initial_cell_state); - lstm_sequence->set_argument(3, sequence_lengths); - lstm_sequence->set_argument(4, W); - lstm_sequence->set_argument(5, R); - lstm_sequence->set_argument(6, B); - lstm_sequence->set_argument(7, P); - - return lstm_sequence; -} -OPENVINO_SUPPRESS_DEPRECATED_END - } // namespace TEST(type_prop, lstm_sequence_forward) { @@ -173,56 +137,6 @@ TEST(type_prop, lstm_sequence_forward) { EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); } -TEST(type_prop, lstm_sequence_v0_forward) { - const size_t batch_size = 8; - const size_t num_directions = 1; - const size_t seq_length = 6; - const size_t input_size = 4; - const size_t hidden_size = 128; - - const auto X = make_shared(element::f32, Shape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); - const auto W = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, input_size}); - const auto R = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, hidden_size}); - const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); - const auto P = make_shared(element::f32, Shape{num_directions, 3 * hidden_size}); - - const auto lstm_direction = op::RecurrentSequenceDirection::FORWARD; - - OPENVINO_SUPPRESS_DEPRECATED_START - const auto lstm_sequence = make_shared(X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - P, - hidden_size, - lstm_direction); - - EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size); - EXPECT_EQ(lstm_sequence->get_direction(), op::RecurrentSequenceDirection::FORWARD); - EXPECT_TRUE(lstm_sequence->get_activations_alpha().empty()); - EXPECT_TRUE(lstm_sequence->get_activations_beta().empty()); - EXPECT_EQ(lstm_sequence->get_activations()[0], "sigmoid"); - EXPECT_EQ(lstm_sequence->get_activations()[1], "tanh"); - EXPECT_EQ(lstm_sequence->get_activations()[2], "tanh"); - EXPECT_EQ(lstm_sequence->get_clip_threshold(), 0.f); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32); - EXPECT_EQ(lstm_sequence->outputs().size(), 3); - EXPECT_EQ(lstm_sequence->get_output_shape(0), (Shape{batch_size, num_directions, seq_length, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); - OPENVINO_SUPPRESS_DEPRECATED_END -} - TEST(type_prop, lstm_sequence_bidirectional) { const size_t batch_size = 24; const size_t num_directions = 2; @@ -273,68 +187,6 @@ TEST(type_prop, lstm_sequence_bidirectional) { EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); } -TEST(type_prop, lstm_sequence_v0_bidirectional) { - const size_t batch_size = 24; - const size_t num_directions = 2; - const size_t seq_length = 12; - const size_t input_size = 8; - const size_t hidden_size = 256; - const bool input_forget = true; - const ov::op::LSTMWeightsFormat weights_format = ov::op::LSTMWeightsFormat::FICO; - const float clip_threshold = 3.5f; - - const auto X = make_shared(element::f32, Shape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); - const auto W = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, input_size}); - const auto R = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, hidden_size}); - const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); - const auto P = make_shared(element::f32, Shape{num_directions, 3 * hidden_size}); - - const auto lstm_direction = opset5::LSTMSequence::direction::BIDIRECTIONAL; - const std::vector activations_alpha = {2.7f, 7.0f, 32.367f}; - const std::vector activations_beta = {0.0f, 5.49f, 6.0f}; - const std::vector activations = {"tanh", "sigmoid", "sigmoid"}; - - OPENVINO_SUPPRESS_DEPRECATED_START - const auto lstm_sequence = make_shared(X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - hidden_size, - lstm_direction, - weights_format, - activations_alpha, - activations_beta, - activations, - clip_threshold, - input_forget); - - EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size); - EXPECT_EQ(lstm_sequence->get_direction(), opset5::LSTMSequence::direction::BIDIRECTIONAL); - EXPECT_EQ(lstm_sequence->get_activations_alpha(), activations_alpha); - EXPECT_EQ(lstm_sequence->get_activations_beta(), activations_beta); - EXPECT_EQ(lstm_sequence->get_activations()[0], "tanh"); - EXPECT_EQ(lstm_sequence->get_activations()[1], "sigmoid"); - EXPECT_EQ(lstm_sequence->get_activations()[2], "sigmoid"); - EXPECT_EQ(lstm_sequence->get_clip_threshold(), 3.5f); - EXPECT_EQ(lstm_sequence->get_input_forget(), true); - EXPECT_EQ(lstm_sequence->get_weights_format(), ov::op::LSTMWeightsFormat::FICO); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(0), (Shape{batch_size, num_directions, seq_length, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); - OPENVINO_SUPPRESS_DEPRECATED_END -} - TEST(type_prop, lstm_sequence_dynamic_batch_size) { recurrent_sequence_parameters param; @@ -573,214 +425,3 @@ TEST(type_prop, lstm_sequence_invalid_input_direction_num_mismatch) { check_error(op::RecurrentSequenceDirection::FORWARD, 2); check_error(op::RecurrentSequenceDirection::REVERSE, 2); } - -OPENVINO_SUPPRESS_DEPRECATED_START -TEST(type_prop, lstm_sequence_v0_dynamic_num_directions) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = Dimension::dynamic(); - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, 1, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_dynamic_seq_length) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = Dimension::dynamic(); - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_dynamic_hidden_size) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = Dimension::dynamic(); - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_dynamic_inputs) { - recurrent_sequence_parameters param; - - param.batch_size = Dimension::dynamic(); - param.input_size = Dimension::dynamic(); - param.hidden_size = Dimension::dynamic(); - param.num_directions = Dimension::dynamic(); - param.seq_length = Dimension::dynamic(); - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->validate_and_infer_types(); - - EXPECT_EQ(lstm_sequence->get_output_partial_shape(0), - (PartialShape{param.batch_size, 1, param.seq_length, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(1), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_partial_shape(2), (PartialShape{param.batch_size, 1, param.hidden_size})); - EXPECT_EQ(lstm_sequence->get_output_element_type(0), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(1), param.et); - EXPECT_EQ(lstm_sequence->get_output_element_type(2), param.et); -} - -TEST(type_prop, lstm_sequence_v0_invalid_input_dimension) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 2; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - auto invalid_rank0_tensor = make_shared(param.et, PartialShape{}); - - // Validate invalid rank0 tensor for all inputs: X, initial_hidden_state, initial_cell_state W, - // R, B - for (size_t i = 0; i < lstm_sequence->get_input_size(); i++) { - lstm_sequence = lstm_seq_v0_tensor_initialization(param); - lstm_sequence->set_argument(i, invalid_rank0_tensor); - ASSERT_THROW(lstm_sequence->validate_and_infer_types(), ov::AssertFailure) - << "LSTMSequence node was created with invalid data."; - } -} - -TEST(type_prop, lstm_sequence_v0_input_dynamic_rank) { - recurrent_sequence_parameters param; - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto op = lstm_seq_tensor_initialization(param); - auto dynamic_tensor = make_shared(param.et, PartialShape::dynamic(Rank::dynamic())); - - for (size_t i = 0; i < op->get_input_size(); i++) { - auto op = lstm_seq_v0_tensor_initialization(param); - op->set_argument(i, dynamic_tensor); - op->validate_and_infer_types(); - if (i == 0) { // X input - EXPECT_EQ(op->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, -1, param.hidden_size})); - } else { - EXPECT_EQ(op->get_output_partial_shape(0), - (PartialShape{param.batch_size, param.num_directions, param.seq_length, param.hidden_size})); - } - EXPECT_EQ(op->get_output_partial_shape(1), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - EXPECT_EQ(op->get_output_partial_shape(2), - (PartialShape{param.batch_size, param.num_directions, param.hidden_size})); - - EXPECT_EQ(op->get_output_element_type(0), param.et); - EXPECT_EQ(op->get_output_element_type(1), param.et); - } -} - -TEST(type_prop, lstm_sequence_v0_invalid_input_direction) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 3; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING( - error.what(), - std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); - } - - param.num_directions = 2; // 2 is also not allowed for default 'm_direction' = FORWARD - lstm_sequence = lstm_seq_v0_tensor_initialization(param); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING( - error.what(), - std::string("Dimension `num_directions` doesn't match to other inputs or `direction` attribute")); - } -} - -TEST(type_prop, lstm_sequence_v0_invalid_input_P) { - recurrent_sequence_parameters param; - - param.batch_size = 24; - param.num_directions = 1; - param.seq_length = 12; - param.input_size = 8; - param.hidden_size = 256; - param.et = element::f32; - - auto lstm_sequence = lstm_seq_v0_tensor_initialization(param); - auto P = make_shared(element::f32, PartialShape{param.hidden_size * 5}); - lstm_sequence->set_argument(7, P); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), "Input tensor P should have rank equal 2"); - } - P = make_shared(element::f32, PartialShape{param.num_directions, param.hidden_size * 5}); - lstm_sequence->set_argument(7, P); - try { - lstm_sequence->validate_and_infer_types(); - FAIL() << "LSTMSequence node was created with invalid data."; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), "Inorrect shape of P input. Second dimension is: 1280, expected: 768"); - } - OPENVINO_SUPPRESS_DEPRECATED_END -} diff --git a/src/core/tests/type_prop/range.cpp b/src/core/tests/type_prop/range.cpp index c37f5987047a53..d44fe3c3bcc6b8 100644 --- a/src/core/tests/type_prop/range.cpp +++ b/src/core/tests/type_prop/range.cpp @@ -895,3 +895,21 @@ INSTANTIATE_TEST_SUITE_P(type_prop, RangeParams{-1, 1, 0.25, PartialShape{8}}, RangeParams{-1, 0.875, 0.25, PartialShape{8}}), PrintToDummyParamName()); + +TEST(type_prop, range_symbol_start_0_stop_A_step_1) { + auto stop_symbol = std::make_shared(); + auto source_shape = PartialShape::dynamic(1); + source_shape[0].set_symbol(stop_symbol); + auto symbol_source = + make_shared(make_shared(element::i64, source_shape)); + + auto start = make_shared(element::i64, Shape{}, 0); + auto stop = make_shared(symbol_source, + make_shared(element::i64, Shape{}, 0), + make_shared(element::i64, Shape{}, 0)); + auto step = make_shared(element::i64, Shape{}, 1); + + auto range = make_shared(start, stop, step); + + ASSERT_TRUE(ov::symbol::are_equal(range->get_output_partial_shape(0)[0].get_symbol(), stop_symbol)); +} diff --git a/src/core/tests/type_prop/reshape.cpp b/src/core/tests/type_prop/reshape.cpp index 7ed87ce4720ab2..3a16c5835c57f8 100644 --- a/src/core/tests/type_prop/reshape.cpp +++ b/src/core/tests/type_prop/reshape.cpp @@ -1333,3 +1333,24 @@ TEST(type_prop, reshape_pattern_dim_has_invalid_bound) { EXPECT_EQ(reshape->get_output_partial_shape(0), ov::PartialShape({7, -1, 1, 2})); } + +TEST(type_prop, reshape_symbol_deducing) { + auto A = std::make_shared(); + auto B = std::make_shared(); + auto C = std::make_shared(); + + auto in_shape = ov::PartialShape({-1, -1, 768}); + in_shape[0].set_symbol(A); + in_shape[1].set_symbol(B); + + auto out_shape = ov::PartialShape({-1, -1, 12, 64}); + out_shape[0].set_symbol(A); + out_shape[1].set_symbol(C); + + const auto in = std::make_shared(element::f32, in_shape); + const auto out = std::make_shared(std::make_shared(element::f32, out_shape)); + const auto reshape = std::make_shared(in, out, false); + + EXPECT_EQ(reshape->get_output_partial_shape(0), ov::PartialShape({-1, -1, 12, 64})); + EXPECT_TRUE(ov::symbol::are_equal(B, C)); +} \ No newline at end of file diff --git a/src/core/tests/type_prop/rnn_seq_base.cpp b/src/core/tests/type_prop/rnn_seq_base.cpp index 9b3c1b7f8f25df..db38622333d67f 100644 --- a/src/core/tests/type_prop/rnn_seq_base.cpp +++ b/src/core/tests/type_prop/rnn_seq_base.cpp @@ -83,11 +83,7 @@ class RNNSeqBaseTest : public TypePropOpTest { return std::make_shared(X, H_t, sequence_lengths, W, R, B, p.hidden_size.get_max_length(), p.direction); } - OPENVINO_SUPPRESS_DEPRECATED_START - template < - typename T = TOp, - typename std::enable_if::value || std::is_same::value, - bool>::type = true> + template ::value, bool>::type = true> std::shared_ptr make_rnn_seq_based_op(RNNSeqParams& p, bool use_default_ctor = false) { p.gates_count = 4; p.outputs_size = 3; @@ -109,19 +105,12 @@ class RNNSeqBaseTest : public TypePropOpTest { op->set_direction(p.direction); op->set_hidden_size(p.hidden_size.get_max_length()); auto inputs = OutputVector{X, H_t, C_t, sequence_lengths, W, R, B}; - if (ov::is_type(op)) { - const auto P = - make_shared(p.et, - PartialShape{p.num_directions, p.hidden_size * (p.gates_count - 1)}); - inputs.push_back(P); - } op->set_arguments(inputs); op->validate_and_infer_types(); return op; } return std::make_shared(X, H_t, C_t, sequence_lengths, W, R, B, p.hidden_size.get_max_length(), p.direction); } - OPENVINO_SUPPRESS_DEPRECATED_END }; TYPED_TEST_SUITE_P(RNNSeqBaseTest); @@ -284,9 +273,7 @@ REGISTER_TYPED_TEST_SUITE_P(RNNSeqBaseTest, interval_symbols_dims_shape_infer_REVERSE, interval_symbols_dims_shape_infer_BIDIRECTIONAL); -OPENVINO_SUPPRESS_DEPRECATED_START -using RNNSeqBaseTypes = Types; -OPENVINO_SUPPRESS_DEPRECATED_END +using RNNSeqBaseTypes = Types; INSTANTIATE_TYPED_TEST_SUITE_P(type_prop, RNNSeqBaseTest, RNNSeqBaseTypes); } // namespace rnn_seq_test diff --git a/src/core/tests/type_prop/squeeze.cpp b/src/core/tests/type_prop/squeeze.cpp index c7d81fd97c2786..7be05de1876d9f 100644 --- a/src/core/tests/type_prop/squeeze.cpp +++ b/src/core/tests/type_prop/squeeze.cpp @@ -7,193 +7,261 @@ #include "common_test_utils/test_assertions.hpp" #include "common_test_utils/type_prop.hpp" #include "openvino/op/broadcast.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/shape_of.hpp" -#include "openvino/op/unsqueeze.hpp" #include "sequence_generator.hpp" using namespace std; using namespace ov; using namespace testing; -TEST(type_prop, squeeze_axes_invalid_value) { +namespace { + +template +class SqueezelOperator : public TypePropOpTest {}; + +using SqueezeTypes = ::testing::Types; + +TYPED_TEST_SUITE(SqueezelOperator, SqueezeTypes); + +TYPED_TEST(SqueezelOperator, squeeze_axes_invalid_value) { auto param = make_shared(element::f32, Shape{1, 2, 3, 4}); auto axes_node = make_shared(element::u64, Shape{2}, vector{0, 2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), (PartialShape{2, 3, 4})); } -TEST(type_prop, squeeze_single_input) { +TYPED_TEST(SqueezelOperator, squeeze_single_input) { auto param = make_shared(element::f32, PartialShape{1, -1, 3, 4}); - auto s = make_shared(param); - EXPECT_EQ(s->get_output_partial_shape(0), PartialShape::dynamic()); + const auto squeeze = this->make_op(param); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_axes_invalid_rank) { +TYPED_TEST(SqueezelOperator, squeeze_axes_invalid_rank) { auto param = make_shared(element::f32, Shape{1, 2, 3, 4}); auto axes_node = make_shared(element::i32, Shape{2, 1}, vector{0, 2}); - OV_EXPECT_THROW(auto s = make_shared(param, axes_node), + OV_EXPECT_THROW(const auto squeeze = this->make_op(param, axes_node), NodeValidationFailure, HasSubstr("Second input (axes) should not be of rank higher than 1.")); } -TEST(type_prop, squeeze_incorrect_negative_axes) { +TYPED_TEST(SqueezelOperator, squeeze_incorrect_negative_axes) { auto param = make_shared(element::f32, Shape{1, 4, 1, 4, 1, 8}); auto axes_node = make_shared(element::i64, Shape{2}, vector{-6, -10}); - OV_EXPECT_THROW(auto s = make_shared(param, axes_node), + OV_EXPECT_THROW(const auto squeeze = this->make_op(param, axes_node), ov::Exception, HasSubstr("Axis -10 out of the tensor rank range")); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_no_squeezable_dims) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_single_elem_static_shape_no_squeezable_dims) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 2, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), (PartialShape{2, 2, 4})); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_two) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_two) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(2)); } -TEST(type_prop, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_one) { + auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(2)); } -TEST(type_prop, squeeze_data_scalar_param_axes_1D_single_elem_static_shape) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_one) { + auto param = std::make_shared(ov::element::f32, PartialShape{2, 1, 4}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_scalar_param_axes_1D_single_elem_static_shape) { auto param = std::make_shared(ov::element::f32, PartialShape{}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_equal) { +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_equal) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { +TYPED_TEST(SqueezelOperator, squeeze_data_static_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(4)); } -TEST(type_prop, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(4)); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_static_param_axes_scalar_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{1, 2, 1, 3, 1}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_two_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } -TEST(type_prop, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } -TEST(type_prop, squeeze_data_dyamic_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_scalar_static_shape_squeezable_dims_more) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TYPED_TEST(SqueezelOperator, squeeze_data_dyamic_param_axes_1D_two_elem_static_shape_squeezable_dims_one) { auto param = std::make_shared(ov::element::f32, PartialShape{2, -1, 4}); const auto axes_node = std::make_shared(element::u64, PartialShape{2}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_three_elem_static_shape_squeezable_dims_two) { +TYPED_TEST(SqueezelOperator, squeeze_data_dynamic_param_axes_1D_three_elem_static_shape_squeezable_dims_two) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{3}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = this->make_op(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); } -TEST(type_prop, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { +TEST(TypePropSqueezelOperatorV0, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); const auto axes_node = std::make_shared(element::u64, PartialShape{1}); - const auto squeeze = std::make_shared(param, axes_node); + const auto squeeze = std::make_shared(param, axes_node); EXPECT_EQ(squeeze->get_element_type(), element::f32); EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic(3)); } +TEST(TypePropSqueezelOperatorV15, squeeze_data_dynamic_param_axes_1D_single_elem_static_shape_squeezable_dims_less) { + auto param = std::make_shared(ov::element::f32, PartialShape{-1, {2, 8}, {1, 3}, {4, -1}}); + const auto axes_node = std::make_shared(element::u64, PartialShape{1}); + const auto squeeze = std::make_shared(param, axes_node); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); +} + using SqueezeTypePropTestParam = std::tuple, // Squeeze axis PartialShape // Expected shape @@ -288,26 +356,44 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_default_axes, TEST_P(SqueezeTest, partial_shape_dimension_propagation_const_axis_i32) { const auto axes_node = std::make_shared(element::i32, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } TEST_P(SqueezeTest, partial_shape_dimension_propagation_parameter_axes_no_data) { const auto axes_node = std::make_shared(element::u64, PartialShape{Shape{axes.size()}}); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_TRUE(squeeze->get_output_partial_shape(0).compatible(exp_shape)); + } } TEST_P(SqueezeTest, partial_shape_dimension_propagation_dynamic_axes) { const auto axes_node = std::make_shared(element::u64, PartialShape::dynamic()); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), PartialShape::dynamic()); + } } TEST_P(SqueezeTest, symbols_propagation) { @@ -321,9 +407,14 @@ TEST_P(SqueezeTest, symbols_propagation) { param = make_shared(element::f32, p_shape); const auto axes_node = std::make_shared(element::i32, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(get_shape_symbols(squeeze->get_output_partial_shape(0)), exp_symbols); + } } using SqueezeShapeTests = SqueezeTest; @@ -336,10 +427,16 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_no_axes, TEST_P(SqueezeShapeTests, shape_dimension_propagation_const_axis_i64) { param = std::make_shared(element::f64, p_shape.to_shape()); const auto axes_node = std::make_shared(element::i64, Shape{axes.size()}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f64); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f64); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f64); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape.to_shape()); + } } using SqueezeNoAxesTest = SqueezeTest; @@ -350,10 +447,16 @@ INSTANTIATE_TEST_SUITE_P(type_prop_shrink_shape_no_axes, PrintToStringParamName()); TEST_P(SqueezeNoAxesTest, partial_shape_dimension_propagation_no_axes) { - const auto squeeze = std::make_shared(param); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } using SqueezeScalarAxisTest = SqueezeTest; @@ -368,25 +471,35 @@ INSTANTIATE_TEST_SUITE_P( TEST_P(SqueezeScalarAxisTest, axis_value_as_vector) { const auto axes_node = std::make_shared(element::i32, Shape{}, axes); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } TEST_P(SqueezeScalarAxisTest, axis_value_as_integer) { const auto axes_node = std::make_shared(element::i32, Shape{}, axes.front()); - const auto squeeze = std::make_shared(param, axes_node); - - EXPECT_EQ(squeeze->get_element_type(), element::f32); - EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } + { + const auto squeeze = std::make_shared(param, axes_node); + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + } } using SqueezeBoundTest = UnSqueezeBoundTest; -INSTANTIATE_TEST_SUITE_P( - type_prop_bounds_propagate, - SqueezeBoundTest, +const auto test_values_in = Values(std::make_tuple(PartialShape::dynamic(6), PartialShape::dynamic(1)), std::make_tuple(PartialShape{Dimension(-1)}, PartialShape{Dimension(-1)}), std::make_tuple(PartialShape{Dimension::dynamic(), 8}, PartialShape{Dimension::dynamic()}), @@ -394,34 +507,136 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(PartialShape{Dimension(20, -1), Dimension::dynamic()}, PartialShape{{20, -1}}), std::make_tuple(PartialShape{Dimension(-1, 5), Dimension::dynamic()}, PartialShape{Dimension(-1, 5)}), std::make_tuple(PartialShape{15}, PartialShape{15}), - std::make_tuple(PartialShape{2, 6}, PartialShape{2})), - PrintToStringParamName()); + std::make_tuple(PartialShape{2, 6}, PartialShape{2})); + +INSTANTIATE_TEST_SUITE_P(type_prop_bounds_propagate, SqueezeBoundTest, test_values_in, PrintToStringParamName()); /** * \brief Check symbol and dynamic value propagation. * * Test use evaluate symbol, lower/upper. */ -TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value) { +TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value_squeeze_v0) { PartialShape symboled_shape = PartialShape{p_shape}; in_symbols = set_shape_symbols(symboled_shape); - constexpr auto et = element::i64; - const auto symboled_param = std::make_shared(et, symboled_shape); - const auto symboled_shape_of = std::make_shared(symboled_param); + const auto squeeze = create_squeeze(symboled_shape); + const auto bc = std::make_shared(param, squeeze); + + EXPECT_EQ(bc->get_output_partial_shape(0), exp_shape); + const auto symbols = get_shape_symbols(bc->get_output_partial_shape(0)); + EXPECT_THAT(symbols, ElementsAre(in_symbols.front())); +} - const auto zero = std::vector{0}; - const auto axis = std::make_shared(et, Shape{}, zero); - const auto indices = std::make_shared(et, Shape{}, zero); - const auto gather = std::make_shared(symboled_shape_of, indices, axis); - const auto axis_1 = std::make_shared(et, Shape{2}, std::vector{0, 1}); - const auto unsqueeze = std::make_shared(gather, axis_1); - const auto squeeze = std::make_shared(unsqueeze, axis); +/** + * \brief Check symbol and dynamic value propagation. + * + * Test use evaluate symbol, lower/upper. + */ +TEST_P(SqueezeBoundTest, propagate_symbol_and_dynamic_value_squeeze_v15) { + PartialShape symboled_shape = PartialShape{p_shape}; + + in_symbols = set_shape_symbols(symboled_shape); + const auto squeeze = create_squeeze(symboled_shape); const auto bc = std::make_shared(param, squeeze); EXPECT_EQ(bc->get_output_partial_shape(0), exp_shape); const auto symbols = get_shape_symbols(bc->get_output_partial_shape(0)); EXPECT_THAT(symbols, ElementsAre(in_symbols.front())); } + +using SqueezeAxesDynamicRankTestParam = decltype(std::tuple_cat(SqueezeTypePropTestParam{}, std::make_tuple(false))); +class SqueezeAxesDynamicRank : public ::testing::TestWithParam { +protected: + ov::PartialShape p_shape{}, exp_shape{}; + std::vector axes{}; + bool allow_axis_skip{}; +}; + +INSTANTIATE_TEST_SUITE_P( + SqueezeAxesDynamicRankTests, + SqueezeAxesDynamicRank, + ::testing::Values( + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{}, PartialShape::dynamic(), false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0}, PartialShape{2, -1, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0}, PartialShape{2, -1, 4}, true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{2}, PartialShape{1, 2, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{2}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0, 2}, PartialShape{2, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{0, 2}, PartialShape::dynamic(), true), + + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{1}, PartialShape{1, 2, -1, 4}, false), + std::make_tuple(PartialShape{1, 2, -1, 4}, std::vector{1}, PartialShape{1, 2, -1, 4}, true), + + std::make_tuple(PartialShape{2, 4}, std::vector{1}, PartialShape{2, 4}, false), + std::make_tuple(PartialShape{2, 4}, std::vector{1}, PartialShape{2, 4}, true), + + std::make_tuple(PartialShape{2, {3, 5}}, std::vector{}, PartialShape{2, {3, 5}}, false), + std::make_tuple(PartialShape{2, {3, 5}}, std::vector{}, PartialShape{2, {3, 5}}, true), + + std::make_tuple(PartialShape{1, 2, -1}, std::vector{0, 1}, PartialShape{2, -1}, false), + std::make_tuple(PartialShape{1, 2, -1}, std::vector{0, 1}, PartialShape{2, -1}, true), + + std::make_tuple(PartialShape{1, 2, -1}, std::vector{1}, PartialShape{1, 2, -1}, false), + std::make_tuple(PartialShape{1, 2, -1}, std::vector{1}, PartialShape{1, 2, -1}, true), + + std::make_tuple(PartialShape{1, 1, -1}, std::vector{0, 1}, PartialShape{-1}, false), + std::make_tuple(PartialShape{1, 1, -1}, std::vector{0, 1}, PartialShape{-1}, true), + + std::make_tuple(PartialShape{1, 1, -1}, std::vector{1}, PartialShape{1, -1}, false), + std::make_tuple(PartialShape{1, 1, -1}, std::vector{1}, PartialShape{1, -1}, true), + + std::make_tuple(PartialShape{1, 2, 3}, std::vector{}, PartialShape{2, 3}, false), + std::make_tuple(PartialShape{1, 2, 3}, std::vector{}, PartialShape{2, 3}, true))); + +TEST_P(SqueezeAxesDynamicRank, squeeze_axes_dynamic_rank_param) { + const auto& params = GetParam(); + p_shape = std::get<0>(params); + axes = std::get<1>(params); + exp_shape = std::get<2>(params); + allow_axis_skip = std::get<3>(params); + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::u64, Shape{axes.size()}, axes); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +TEST(SqueezeDynamicAxis, squeeze_dynamic_non_const_single_axis) { + auto p_shape = PartialShape{1, 2, -1, 4}; + auto exp_shape = PartialShape::dynamic(); + auto allow_axis_skip = true; + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::i32, Shape{1}); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +TEST(SqueezeDynamicAxis, squeeze_dynamic_non_const_axes) { + auto p_shape = PartialShape{1, 2, -1, 4}; + auto exp_shape = PartialShape::dynamic(); + auto allow_axis_skip = true; + + auto param = make_shared(element::f32, p_shape); + auto axes_node = make_shared(element::i32, PartialShape{-1}); + const auto squeeze = std::make_shared(param, axes_node, allow_axis_skip); + + EXPECT_EQ(squeeze->get_element_type(), element::f32); + EXPECT_EQ(squeeze->get_output_partial_shape(0), exp_shape); + EXPECT_EQ(squeeze->get_allow_axis_skip(), allow_axis_skip); +} + +} // namespace diff --git a/src/core/tests/type_prop/stft.cpp b/src/core/tests/type_prop/stft.cpp index 60fe9597032714..2969af4e5a43bd 100644 --- a/src/core/tests/type_prop/stft.cpp +++ b/src/core/tests/type_prop/stft.cpp @@ -104,6 +104,14 @@ INSTANTIATE_TEST_SUITE_P( type_prop_stft_shape, TypePropSTFTTestP, testing::Values( + std::make_tuple(PartialShape{16}, PartialShape{16}, 16, 16, true, PartialShape{9, 1, 2}), + std::make_tuple(PartialShape{48}, PartialShape{16}, 16, 16, false, PartialShape{3, 9, 2}), + std::make_tuple(PartialShape{56}, PartialShape{7}, 11, 3, false, PartialShape{16, 6, 2}), + std::make_tuple(PartialShape{56}, PartialShape{7}, 11, 3, true, PartialShape{6, 16, 2}), + std::make_tuple(PartialShape{48}, PartialShape{8}, 16, 4, true, PartialShape{9, 9, 2}), + std::make_tuple(PartialShape{{48, 56}}, PartialShape{7}, 11, 3, true, PartialShape{6, {13, 16}, 2}), + std::make_tuple(PartialShape{-1}, PartialShape{7}, 11, 3, true, PartialShape{6, {1, -1}, 2}), + std::make_tuple(PartialShape{1, 16}, PartialShape{16}, 16, 16, true, PartialShape{1, 9, 1, 2}), std::make_tuple(PartialShape{1, 48}, PartialShape{16}, 16, 16, true, PartialShape{1, 9, 3, 2}), std::make_tuple(PartialShape{1, 48}, PartialShape{16}, 16, 16, false, PartialShape{1, 3, 9, 2}), std::make_tuple(PartialShape{2, 48}, PartialShape{8}, 16, 4, true, PartialShape{2, 9, 9, 2}), @@ -138,16 +146,16 @@ TEST_F(TypePropSTFTTest, signal_incompatible_shape) { const auto frame_size = std::make_shared(element::i64, PartialShape{}); const auto frame_step = std::make_shared(element::i64, PartialShape{}); { - const auto signal = std::make_shared(element::f32, PartialShape{48}); + const auto signal = std::make_shared(element::f32, PartialShape{}); OV_EXPECT_THROW(std::ignore = make_op(signal, window, frame_size, frame_step, transform_frames), NodeValidationFailure, - HasSubstr("The shape of signal must be 2D [batch, signal_size]")); + HasSubstr("The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]")); } { const auto signal = std::make_shared(element::f32, PartialShape{-1, 4, 48}); OV_EXPECT_THROW(std::ignore = make_op(signal, window, frame_size, frame_step, transform_frames), NodeValidationFailure, - HasSubstr("The shape of signal must be 2D [batch, signal_size]")); + HasSubstr("The shape of signal must be 1D [signal_size] or 2D [batch, signal_size]")); } } diff --git a/src/core/tests/visitors/op/identity.cpp b/src/core/tests/visitors/op/identity.cpp new file mode 100644 index 00000000000000..f5d66e0b4586ae --- /dev/null +++ b/src/core/tests/visitors/op/identity.cpp @@ -0,0 +1,11 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/identity.hpp" + +#include "unary_ops.hpp" + +using Type = ::testing::Types>; + +INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute, UnaryOperatorVisitor, Type, UnaryOperatorTypeName); diff --git a/src/core/tests/visitors/op/lstm_sequence.cpp b/src/core/tests/visitors/op/lstm_sequence.cpp index 0d386ba7bc4ccf..f9819d9ae84bc9 100644 --- a/src/core/tests/visitors/op/lstm_sequence.cpp +++ b/src/core/tests/visitors/op/lstm_sequence.cpp @@ -61,63 +61,3 @@ TEST(attributes, lstm_sequence_op) { EXPECT_EQ(g_lstm_sequence->get_clip(), lstm_sequence->get_clip()); EXPECT_EQ(g_lstm_sequence->get_direction(), lstm_sequence->get_direction()); } - -OPENVINO_SUPPRESS_DEPRECATED_START -TEST(attributes, lstm_sequence_v1_op) { - NodeBuilder::opset().insert(); - - const size_t batch_size = 4; - const size_t num_directions = 2; - const size_t seq_length = 8; - const size_t input_size = 16; - const size_t hidden_size = 64; - - const auto X = make_shared(element::f32, Shape{batch_size, seq_length, input_size}); - const auto initial_hidden_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto initial_cell_state = - make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); - const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); - const auto W = make_shared(element::f32, Shape{num_directions, 4 * hidden_size, input_size}); - const auto R = - make_shared(element::f32, Shape{num_directions, 4 * hidden_size, hidden_size}); - const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); - const auto P = make_shared(element::f32, Shape{num_directions, 3 * hidden_size}); - - const auto lstm_direction = op::RecurrentSequenceDirection::BIDIRECTIONAL; - const ov::op::LSTMWeightsFormat weights_format = ov::op::LSTMWeightsFormat::FICO; - const std::vector activations_alpha = {1, 2, 3}; - const std::vector activations_beta = {4, 5, 6}; - const std::vector activations = {"tanh", "sigmoid", "tanh"}; - const float clip_threshold = 0.5f; - const bool input_forget = true; - - const auto lstm_sequence = make_shared(X, - initial_hidden_state, - initial_cell_state, - sequence_lengths, - W, - R, - B, - P, - hidden_size, - lstm_direction, - weights_format, - activations_alpha, - activations_beta, - activations, - clip_threshold, - input_forget); - NodeBuilder builder(lstm_sequence, {X, initial_hidden_state, initial_cell_state, sequence_lengths, W, R, B, P}); - auto g_lstm_sequence = ov::as_type_ptr(builder.create()); - - EXPECT_EQ(g_lstm_sequence->get_hidden_size(), lstm_sequence->get_hidden_size()); - EXPECT_EQ(g_lstm_sequence->get_activations(), lstm_sequence->get_activations()); - EXPECT_EQ(g_lstm_sequence->get_activations_alpha(), lstm_sequence->get_activations_alpha()); - EXPECT_EQ(g_lstm_sequence->get_activations_beta(), lstm_sequence->get_activations_beta()); - EXPECT_EQ(g_lstm_sequence->get_clip_threshold(), lstm_sequence->get_clip_threshold()); - EXPECT_EQ(g_lstm_sequence->get_direction(), lstm_sequence->get_direction()); - EXPECT_EQ(g_lstm_sequence->get_input_forget(), lstm_sequence->get_input_forget()); - EXPECT_EQ(g_lstm_sequence->get_weights_format(), lstm_sequence->get_weights_format()); -} -OPENVINO_SUPPRESS_DEPRECATED_END diff --git a/src/core/tests/visitors/op/sorted_search.cpp b/src/core/tests/visitors/op/sorted_search.cpp index 860c9528d0e9aa..10d544527f3714 100644 --- a/src/core/tests/visitors/op/sorted_search.cpp +++ b/src/core/tests/visitors/op/sorted_search.cpp @@ -22,7 +22,7 @@ TEST(attributes, search_sorted_op) { auto g_op = ov::as_type_ptr(builder.create()); // attribute count - const auto expected_attr_count = 1; + const auto expected_attr_count = 2; EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); // space_to_depth attributes diff --git a/src/core/tests/visitors/op/squeeze.cpp b/src/core/tests/visitors/op/squeeze.cpp index 6eb1674b26329a..be596a5fb1dc67 100644 --- a/src/core/tests/visitors/op/squeeze.cpp +++ b/src/core/tests/visitors/op/squeeze.cpp @@ -6,7 +6,16 @@ #include "unary_ops.hpp" +namespace v0 { using Types = ::testing::Types, UnaryOperatorType>; INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute, UnaryOperatorVisitor, Types, UnaryOperatorTypeName); +} // namespace v0 + +namespace v15 { +using Types = ::testing::Types, + UnaryOperatorTypeWithAttribute>; + +INSTANTIATE_TYPED_TEST_SUITE_P(visitor_single_attribute, UnaryOperatorVisitor, Types, UnaryOperatorTypeName); +} // namespace v15 diff --git a/src/core/tests/visitors/op/unary_ops.hpp b/src/core/tests/visitors/op/unary_ops.hpp index 3bef2429983e9f..6cc2afda62e253 100644 --- a/src/core/tests/visitors/op/unary_ops.hpp +++ b/src/core/tests/visitors/op/unary_ops.hpp @@ -9,12 +9,17 @@ #include "openvino/op/parameter.hpp" #include "visitors/visitors.hpp" -template +template class UnaryOperatorType { public: using op_type = T; static constexpr ov::element::Type_t element_type = ELEMENT_TYPE; + static constexpr int expected_attr_count = ATTRIBUTES_COUNT; }; + +template +using UnaryOperatorTypeWithAttribute = UnaryOperatorType; + template class UnaryOperatorVisitor : public testing::Test {}; @@ -43,7 +48,7 @@ TYPED_TEST_P(UnaryOperatorVisitor, No_Attribute_4D) { EXPECT_NO_THROW(auto g_op_func = ov::as_type_ptr(builder.create())); - const auto expected_attr_count = 0; + const auto expected_attr_count = TypeParam::expected_attr_count; EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); } diff --git a/src/frontends/common/include/openvino/frontend/extension/op.hpp b/src/frontends/common/include/openvino/frontend/extension/op.hpp index 4198c411082e42..9022b9f801f800 100644 --- a/src/frontends/common/include/openvino/frontend/extension/op.hpp +++ b/src/frontends/common/include/openvino/frontend/extension/op.hpp @@ -25,7 +25,7 @@ inline const ov::OpSet& get_opset_by_name(const std::string& opset_name) { if (opsets.find(opset_name) != opsets.end()) return opsets.at(opset_name)(); if (opset_name.empty() || opset_name == "latest") { - return ov::get_opset14(); // TODO (ticket 138273): Update at the end of the opset15 development + return ov::get_opset15(); // TODO (ticket: 156877): Update to 16 at the end of opset16 development } else { FRONT_END_GENERAL_CHECK(false, "Unsupported opset name: ", opset_name); } diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp index db979a35d932af..c5e137e1decc89 100644 --- a/src/frontends/ir/src/frontend.cpp +++ b/src/frontends/ir/src/frontend.cpp @@ -168,15 +168,21 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const return exts; }; - auto create_input_model = [&]() -> std::shared_ptr { + auto create_input_model = [&](std::string weights_path) -> std::shared_ptr { if (provided_model_stream) { - return std::make_shared(*provided_model_stream, weights, create_extensions_map()); + return std::make_shared(*provided_model_stream, + weights, + create_extensions_map(), + std::move(weights_path)); } else if (local_model_stream.is_open()) { - auto input_model = std::make_shared(local_model_stream, weights, create_extensions_map()); + auto input_model = std::make_shared(local_model_stream, + weights, + create_extensions_map(), + std::move(weights_path)); local_model_stream.close(); return input_model; } else if (model_buf) { - return std::make_shared(model_buf, weights, create_extensions_map()); + return std::make_shared(model_buf, weights, create_extensions_map(), std::move(weights_path)); } return nullptr; }; @@ -278,7 +284,7 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const } } - return create_input_model(); + return create_input_model(ov::util::path_to_string(weights_path)); } std::shared_ptr FrontEnd::convert(const InputModel::Ptr& model) const { diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp index 6c59617c69a48d..b4d9ef164e994c 100644 --- a/src/frontends/ir/src/input_model.cpp +++ b/src/frontends/ir/src/input_model.cpp @@ -205,13 +205,16 @@ class InputModel::InputModelIRImpl { std::unordered_map m_opsets; pugi::xml_node m_root; pugi::xml_document m_xml_doc; + std::string m_weights_path; public: InputModelIRImpl(std::istream& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) + const std::unordered_map& extensions, + std::string weights_path) : m_weights(weights), - m_extensions(extensions) { + m_extensions(extensions), + m_weights_path(std::move(weights_path)) { pugi::xml_parse_result res = m_xml_doc.load(model); OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); init_opset(); @@ -219,9 +222,11 @@ class InputModel::InputModelIRImpl { InputModelIRImpl(const std::shared_ptr& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) + const std::unordered_map& extensions, + std::string weights_path) : m_weights(weights), - m_extensions(extensions) { + m_extensions(extensions), + m_weights_path(std::move(weights_path)) { auto res = m_xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8); OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); init_opset(); @@ -240,14 +245,16 @@ class InputModel::InputModelIRImpl { InputModel::InputModel(std::istream& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) { - _impl = std::make_shared(model, weights, extensions); + const std::unordered_map& extensions, + std::string weights_path) { + _impl = std::make_shared(model, weights, extensions, std::move(weights_path)); } InputModel::InputModel(const std::shared_ptr& model, const std::shared_ptr& weights, - const std::unordered_map& extensions) { - _impl = std::make_shared(model, weights, extensions); + const std::unordered_map& extensions, + std::string weights_path) { + _impl = std::make_shared(model, weights, extensions, std::move(weights_path)); } std::shared_ptr InputModel::convert() { @@ -263,6 +270,8 @@ std::shared_ptr InputModel::InputModelIRImpl::convert() { std::shared_ptr model; visitor.on_attribute("net", model); model->get_rt_info()["version"] = int64_t(version); + if (!m_weights_path.empty()) + model->get_rt_info()["__weights_path"] = m_weights_path; parse_pre_process(m_root, m_weights, model); return model; diff --git a/src/frontends/ir/src/input_model.hpp b/src/frontends/ir/src/input_model.hpp index 331092749bbeb9..a9ad1224c6ca3a 100644 --- a/src/frontends/ir/src/input_model.hpp +++ b/src/frontends/ir/src/input_model.hpp @@ -22,11 +22,13 @@ class InputModel : public ov::frontend::InputModel { public: InputModel(std::istream& stream, const std::shared_ptr& weights, - const std::unordered_map& extensions); + const std::unordered_map& extensions, + std::string weights_path = {}); InputModel(const std::shared_ptr& model_buf, const std::shared_ptr& weights, - const std::unordered_map& extensions); + const std::unordered_map& extensions, + std::string weights_path = {}); std::shared_ptr convert(); }; diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 68900b150514bc..7c8b6e9d4b97ab 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -9,6 +9,7 @@ #include "openvino/core/except.hpp" #include "openvino/core/meta_data.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/loop.hpp" @@ -944,6 +945,13 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(pugixml::get_uint64_attr(dn, "size")), + static_cast(pugixml::get_uint64_attr(dn, "offset"))); + } } ovNode->set_friendly_name(params.name); @@ -960,16 +968,9 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector - if (!getStrAttribute(item, "name", attribute_name)) { - std::stringstream ss; - item.print(ss); - OPENVINO_THROW("rt_info attribute has no \"name\" field: ", ss.str()); - } - if (!getStrAttribute(item, "version", attribute_version)) { - std::stringstream ss; - item.print(ss); - OPENVINO_THROW("rt_info attribute: ", attribute_name, " has no \"version\" field: ", ss.str()); - } + if (!getStrAttribute(item, "name", attribute_name) || !getStrAttribute(item, "version", attribute_version)) + continue; + const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), attribute_version.c_str()); auto attr = attrs_factory.create_by_type_info(type_info); if (!attr.empty()) { diff --git a/src/frontends/ir/tests/rt_info_deserialization.cpp b/src/frontends/ir/tests/rt_info_deserialization.cpp index 4313b4d19be515..466db1291e674a 100644 --- a/src/frontends/ir/tests/rt_info_deserialization.cpp +++ b/src/frontends/ir/tests/rt_info_deserialization.cpp @@ -405,11 +405,15 @@ TEST_F(RTInfoDeserialization, node_v11) { + + + + 1 22 diff --git a/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp b/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp index 101161a4ec03c9..015713ad72847d 100644 --- a/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp +++ b/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp @@ -101,6 +101,7 @@ class NodeContext : public frontend::NodeContext { } Output get_param(const std::string& name) const { + FRONT_END_GENERAL_CHECK(m_param_name_to_id.count(name), "No param id corresponding name exists: ", name); auto id = m_param_name_to_id.at(name); FRONT_END_GENERAL_CHECK(m_tensor_map->count(id), "No tensor corresponding param id: ", id, " exist."); return m_tensor_map->at(id); diff --git a/src/frontends/jax/src/op/argmax.cpp b/src/frontends/jax/src/op/argmax.cpp new file mode 100644 index 00000000000000..60d852c6d0f358 --- /dev/null +++ b/src/frontends/jax/src/op/argmax.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/jax/node_context.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/topk.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace jax { +namespace op { + +using namespace ov::op; + +OutputVector translate_argmax(const NodeContext& context) { + num_inputs_check(context, 1, 1); + Output input = context.get_input(0); + auto axis_val = context.const_named_param("axes"); + auto axis = context.const_named_param>("axes"); + auto dtype = convert_dtype(context.const_named_param("index_dtype")); + + auto k = std::make_shared(element::i64, Shape{}, 1); + auto topk = std::make_shared(input, + k, + axis_val, + v11::TopK::Mode::MAX, + v1::TopK::SortType::SORT_VALUES, + dtype, + true); + auto indices = topk->output(1); + + auto res = std::make_shared(indices, axis); + return {res}; +}; + +} // namespace op +} // namespace jax +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/jax/src/op/erfc.cpp b/src/frontends/jax/src/op/erfc.cpp new file mode 100644 index 00000000000000..5a38577f868d35 --- /dev/null +++ b/src/frontends/jax/src/op/erfc.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/jax/node_context.hpp" +#include "openvino/op/erf.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace jax { +namespace op { + +OutputVector translate_erfc(const NodeContext& context) { + num_inputs_check(context, 1, 1); + auto x = context.get_input(0); + + // create const one of the same type as x + auto const_one = create_same_type_const_scalar(x, 1); + Output res = make_shared(x); + res = make_shared(const_one, res); + return {res}; +}; + +} // namespace op +} // namespace jax +} // namespace frontend +} // namespace ov diff --git a/src/frontends/jax/src/op_table.cpp b/src/frontends/jax/src/op_table.cpp index 5e92e3de6e212a..98f22452c5afab 100644 --- a/src/frontends/jax/src/op_table.cpp +++ b/src/frontends/jax/src/op_table.cpp @@ -36,6 +36,7 @@ namespace op { template \ OutputVector op(const ov::frontend::jax::NodeContext& node) +OP_CONVERTER(translate_argmax); OP_T_CONVERTER(translate_binary_op); OP_CONVERTER(translate_broadcast_in_dim); OP_CONVERTER(translate_concatenate); @@ -44,6 +45,7 @@ OP_CONVERTER(translate_convert); OP_CONVERTER(translate_convolution); OP_CONVERTER(translate_copy); OP_CONVERTER(translate_dot_general); +OP_CONVERTER(translate_erfc); OP_CONVERTER(translate_integer_pow); OP_T_CONVERTER(translate_reduce_op); OP_CONVERTER(translate_reduce_window_max); @@ -59,6 +61,7 @@ OP_CONVERTER(translate_transpose); // Supported ops for Jaxpr const std::map get_supported_ops_jaxpr() { return {{"add", op::translate_1to1_match_2_inputs}, + {"argmax", op::translate_argmax}, {"broadcast_in_dim", op::translate_broadcast_in_dim}, {"concatenate", op::translate_concatenate}, {"constant", op::translate_constant}, @@ -70,6 +73,7 @@ const std::map get_supported_ops_jaxpr() { {"dot_general", op::translate_dot_general}, {"eq", op::translate_binary_op}, {"erf", op::translate_1to1_match_1_input}, + {"erfc", op::translate_erfc}, {"exp", op::translate_1to1_match_1_input}, {"ge", op::translate_binary_op}, {"gt", op::translate_binary_op}, diff --git a/src/frontends/jax/src/utils.cpp b/src/frontends/jax/src/utils.cpp index d47abfbba56188..f626031ec8dc58 100644 --- a/src/frontends/jax/src/utils.cpp +++ b/src/frontends/jax/src/utils.cpp @@ -16,6 +16,7 @@ namespace jax { void num_inputs_check(const NodeContext& context, size_t min_inputs, size_t max_inputs) { auto inputs = context.inputs(); FRONT_END_OP_CONVERSION_CHECK(inputs.size() >= min_inputs, "Got less inputs than expected"); + FRONT_END_OP_CONVERSION_CHECK(inputs.size() <= max_inputs, "Got more inputs than expected"); } void num_inputs_check(const NodeContext& context, size_t min_inputs) { diff --git a/src/frontends/onnx/frontend/src/core/graph.cpp b/src/frontends/onnx/frontend/src/core/graph.cpp index ad755c958703cb..80ec9c6c9f390c 100644 --- a/src/frontends/onnx/frontend/src/core/graph.cpp +++ b/src/frontends/onnx/frontend/src/core/graph.cpp @@ -446,10 +446,6 @@ void Graph::set_friendly_names(const Node& onnx_node, const ov::OutputVector& ov // null node does not have tensor if (!ov::op::util::is_null(ov_subgraph_outputs[i])) { ov_subgraph_outputs[i].get_tensor().set_names({onnx_node.output(static_cast(i))}); - OPENVINO_SUPPRESS_DEPRECATED_START - ov::descriptor::set_ov_tensor_legacy_name(ov_subgraph_outputs[i].get_tensor(), - onnx_node.output(static_cast(i))); - OPENVINO_SUPPRESS_DEPRECATED_END } } } diff --git a/src/frontends/onnx/frontend/src/core/tensor.cpp b/src/frontends/onnx/frontend/src/core/tensor.cpp index 4f8f54e2d83690..b23f6c55253ac1 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.cpp +++ b/src/frontends/onnx/frontend/src/core/tensor.cpp @@ -82,18 +82,11 @@ std::vector Tensor::get_data() const { if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); } -#ifdef ONNX_VERSION_116 if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_INT8 || m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_INT4) { return detail::__get_data(m_tensor_proto->int32_data()); } ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "INT4, INT8, raw data"); -#else - if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_INT8) { - return detail::__get_data(m_tensor_proto->int32_data()); - } - ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "INT8, raw data"); -#endif } template <> @@ -146,18 +139,11 @@ std::vector Tensor::get_data() const { if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); } -#ifdef ONNX_VERSION_116 if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_UINT8 || m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_UINT4) { return detail::__get_data(m_tensor_proto->int32_data()); } ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "UINT4, UINT8, raw data"); -#else - if (m_tensor_proto->data_type() == TensorProto_DataType::TensorProto_DataType_UINT8) { - return detail::__get_data(m_tensor_proto->int32_data()); - } - ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "UINT8, raw data"); -#endif } template <> diff --git a/src/frontends/onnx/frontend/src/core/tensor.hpp b/src/frontends/onnx/frontend/src/core/tensor.hpp index ae6fe28754b4e5..af4d299f9d45e7 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.hpp +++ b/src/frontends/onnx/frontend/src/core/tensor.hpp @@ -65,10 +65,8 @@ class Tensor { enum class Type { undefined = TensorProto_DataType::TensorProto_DataType_UNDEFINED, float32 = TensorProto_DataType::TensorProto_DataType_FLOAT, -#ifdef ONNX_VERSION_116 uint4 = TensorProto_DataType::TensorProto_DataType_UINT4, int4 = TensorProto_DataType::TensorProto_DataType_INT4, -#endif uint8 = TensorProto_DataType::TensorProto_DataType_UINT8, int8 = TensorProto_DataType::TensorProto_DataType_INT8, uint16 = TensorProto_DataType::TensorProto_DataType_UINT16, @@ -146,10 +144,8 @@ class Tensor { return ov::element::f16; case TensorProto_DataType::TensorProto_DataType_DOUBLE: return ov::element::f64; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_INT4: return ov::element::i4; -#endif case TensorProto_DataType::TensorProto_DataType_INT8: return ov::element::i8; case TensorProto_DataType::TensorProto_DataType_INT16: @@ -158,10 +154,8 @@ class Tensor { return ov::element::i32; case TensorProto_DataType::TensorProto_DataType_INT64: return ov::element::i64; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_UINT4: return ov::element::u4; -#endif case TensorProto_DataType::TensorProto_DataType_UINT8: return ov::element::u8; case TensorProto_DataType::TensorProto_DataType_UINT16: @@ -205,10 +199,8 @@ class Tensor { return make_ov_constant(ov::element::f16); case TensorProto_DataType::TensorProto_DataType_DOUBLE: return make_ov_constant(ov::element::f64); -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_INT4: return make_ov_constant(ov::element::i4); -#endif case TensorProto_DataType::TensorProto_DataType_INT8: return make_ov_constant(ov::element::i8); case TensorProto_DataType::TensorProto_DataType_INT16: @@ -217,10 +209,8 @@ class Tensor { return make_ov_constant(ov::element::i32); case TensorProto_DataType::TensorProto_DataType_INT64: return make_ov_constant(ov::element::i64); -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_UINT4: return make_ov_constant(ov::element::u4); -#endif case TensorProto_DataType::TensorProto_DataType_UINT8: return make_ov_constant(ov::element::u8); case TensorProto_DataType::TensorProto_DataType_UINT16: @@ -238,17 +228,10 @@ class Tensor { case TensorProto_DataType::TensorProto_DataType_STRING: return make_ov_constant(ov::element::string); default: -#ifdef ONNX_VERSION_116 ONNX_UNSUPPORTED_DATA_TYPE( m_tensor_proto->data_type(), "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, INT32, INT64, " "UINT4, UINT8, UINT16, UINT32, UINT64, STRING"); -#else - ONNX_UNSUPPORTED_DATA_TYPE( - m_tensor_proto->data_type(), - "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT8, INT16, INT32, INT64, " - "UINT8, UINT16, UINT32, UINT64, STRING"); -#endif } } diff --git a/src/frontends/onnx/frontend/src/editor.cpp b/src/frontends/onnx/frontend/src/editor.cpp index eaa7b31a61c03f..4ad576cd9d5b96 100644 --- a/src/frontends/onnx/frontend/src/editor.cpp +++ b/src/frontends/onnx/frontend/src/editor.cpp @@ -343,6 +343,14 @@ ONNXModelEditor::ONNXModelEditor(std::istream& model_stream, delete impl; }} {} +ONNXModelEditor::ONNXModelEditor(std::shared_ptr model_proto, frontend::ExtensionHolder extensions) + : m_model_path{""}, + m_mmap_cache{nullptr}, + m_extensions{std::move(extensions)}, + m_pimpl{new ONNXModelEditor::Impl{model_proto}, [](Impl* impl) { + delete impl; + }} {} + const std::string& ONNXModelEditor::model_path() const { return m_model_path; } diff --git a/src/frontends/onnx/frontend/src/editor.hpp b/src/frontends/onnx/frontend/src/editor.hpp index 81d2527c88b9cf..5c7619ed87dbf2 100644 --- a/src/frontends/onnx/frontend/src/editor.hpp +++ b/src/frontends/onnx/frontend/src/editor.hpp @@ -16,6 +16,8 @@ #include "openvino/op/constant.hpp" #include "utils/tensor_external_data.hpp" +using ::ONNX_NAMESPACE::ModelProto; + namespace ov { namespace frontend { namespace onnx { @@ -54,6 +56,13 @@ class ONNXModelEditor final { const bool enable_mmap = false, frontend::ExtensionHolder extensions = {}); + /// \brief Creates an editor from a ModelProto. The model_proto is + /// stored in m_model_proto member variable. + /// + /// \param model_proto A shared pointer on ModelProto object. + /// \param extensions Holder for custom extensions (like custom ops). + ONNXModelEditor(std::shared_ptr model_proto, frontend::ExtensionHolder extensions = {}); + /// \brief Modifies the in-memory representation of the model by setting /// custom input types for all inputs specified in the provided map. /// diff --git a/src/frontends/onnx/frontend/src/frontend.cpp b/src/frontends/onnx/frontend/src/frontend.cpp index d4b83fee20db82..5ad28be3654422 100644 --- a/src/frontends/onnx/frontend/src/frontend.cpp +++ b/src/frontends/onnx/frontend/src/frontend.cpp @@ -8,7 +8,9 @@ #endif #include -#ifndef OV_PROTOBUF_ABSL_IS_USED +#ifdef OV_PROTOBUF_ABSL_IS_USED +# include +#else # include #endif @@ -32,6 +34,8 @@ using namespace ov; using namespace ov::frontend::onnx; using namespace ov::frontend::onnx::common; +using ::ONNX_NAMESPACE::ModelProto; +using ::ONNX_NAMESPACE::Version; ONNX_FRONTEND_C_API ov::frontend::FrontEndVersion get_api_version() { return OV_FRONTEND_API_VERSION; @@ -45,7 +49,9 @@ ONNX_FRONTEND_C_API void* get_front_end_data() { }; #ifndef OPENVINO_DEBUG_ENABLE // disable protobuf logging -# ifndef OV_PROTOBUF_ABSL_IS_USED +# ifdef OV_PROTOBUF_ABSL_IS_USED + absl::SetGlobalVLogLevel(0); +# else google::protobuf::SetLogHandler(nullptr); # endif #endif @@ -83,6 +89,17 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const #endif return std::make_shared(*stream, enable_mmap, m_extensions); } + // !!! Experimental feature, it may be changed or removed in the future !!! + if (variants[0].is()) { + void* model_proto_addr = reinterpret_cast(variants[0].as()); + FRONT_END_GENERAL_CHECK(model_proto_addr != 0, "Wrong address of a ModelProto object is passed"); + ModelProto* model_proto_ptr = static_cast(model_proto_addr); + FRONT_END_GENERAL_CHECK( + model_proto_ptr->has_ir_version() && model_proto_ptr->ir_version() < Version::IR_VERSION, + "A ModelProto object contains unsupported IR version"); + return std::make_shared(std::make_shared(*model_proto_ptr), m_extensions); + } + // !!! End of Experimental feature return nullptr; } @@ -213,7 +230,23 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { StreamRewinder rwd{*stream}; return is_valid_model(*stream); } - + // !!! Experimental feature, it may be changed or removed in the future !!! + if (variants[0].is()) { + void* model_proto_addr = reinterpret_cast(variants[0].as()); + if (model_proto_addr == 0) { + return false; + } + ModelProto* model_proto_ptr = static_cast(model_proto_addr); + try { + if (!model_proto_ptr->has_ir_version() || model_proto_ptr->ir_version() > Version::IR_VERSION) { + return false; + } + } catch (...) { + return false; + } + return true; + } + // !!! End of Experimental feature return false; } diff --git a/src/frontends/onnx/frontend/src/input_model.cpp b/src/frontends/onnx/frontend/src/input_model.cpp index 108690a6d645d9..87f1439eb18b38 100644 --- a/src/frontends/onnx/frontend/src/input_model.cpp +++ b/src/frontends/onnx/frontend/src/input_model.cpp @@ -37,6 +37,9 @@ InputModel::InputModel(std::istream& model_stream, : InputModel(model_stream, ov::util::wstring_to_string(path), enable_mmap, std::move(extensions)) {} #endif +InputModel::InputModel(std::shared_ptr model_proto, frontend::ExtensionHolder extensions) + : m_editor{std::make_shared(model_proto, std::move(extensions))} {} + std::vector InputModel::get_inputs() const { const auto& inputs = m_editor->model_inputs(); std::vector in_places; diff --git a/src/frontends/onnx/frontend/src/input_model.hpp b/src/frontends/onnx/frontend/src/input_model.hpp index 9bf44a5672fb28..246696621f1fd4 100644 --- a/src/frontends/onnx/frontend/src/input_model.hpp +++ b/src/frontends/onnx/frontend/src/input_model.hpp @@ -10,6 +10,8 @@ #include "openvino/frontend/extension/holder.hpp" +using ::ONNX_NAMESPACE::ModelProto; + namespace ov { namespace frontend { namespace onnx { @@ -33,6 +35,7 @@ class InputModel : public ov::frontend::InputModel { const bool enable_mmap = false, ExtensionHolder extensions = {}); #endif + InputModel(std::shared_ptr model_proto, ExtensionHolder extensions = {}); std::vector get_inputs() const override; std::vector get_outputs() const override; diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp new file mode 100644 index 00000000000000..c4144be9b5ff44 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/quick_gelu.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "core/operator_set.hpp" +#include "exceptions.hpp" +#include "openvino/frontend/exception.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/sigmoid.hpp" +#include "utils/common.hpp" + +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace onnx { +namespace com_microsoft { +namespace opset_1 { +ov::OutputVector quick_gelu(const ov::frontend::onnx::Node& node) { + // Original Documentation: + // https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QuickGelu + // Goal: Compute x * Sigmoid(alpha * x) + common::default_op_checks(node, 1); + + const auto inputs = node.get_ov_inputs(); + const auto& x = inputs[0]; + + // Constrain input type to float16, float, double (f64), bfloat16 + auto element_type = x.get_element_type(); + CHECK_VALID_NODE(node, + element_type == ov::element::f16 || element_type == ov::element::f32 || + element_type == ov::element::f64 || element_type == ov::element::bf16, + "Unsupported input x type, accepted FP16, FP32, FP64, BFP16 but got: ", + element_type); + + // Get attribute from node + const float alpha = node.get_attribute_value("alpha"); + + // Numpy broadcasting rule is automatically applied with mismatched shapes according to: + // https://docs.openvino.ai/2022.3/openvino_docs_ops_arithmetic_Multiply_1.html "Tensor with dimension of size 1 + // will be implicitly broadcasted to match the size of the second tensor." Convert alpha to tensor with size 1 + const auto alpha_tensor = std::make_shared(ov::element::f32, Shape{1}, alpha); + + auto alpha_x = std::make_shared(alpha_tensor, x); + auto sig_alpha_x = std::make_shared(alpha_x); + auto result = std::make_shared(x, sig_alpha_x); + + return {result}; +} // func end + +ONNX_OP("QuickGelu", OPSET_SINCE(1), com_microsoft::opset_1::quick_gelu, MICROSOFT_DOMAIN); + +} // namespace opset_1 +} // namespace com_microsoft +} // namespace onnx +} // namespace frontend +} // namespace ov diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index b09bc73467bc10..47fcc7af60bf61 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -18,6 +18,7 @@ #include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "transformations/rt_info/disable_constant_folding.hpp" #include "utils/common.hpp" #include "utils/reshape.hpp" using namespace ov::op; @@ -221,19 +222,8 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { FRONT_END_GENERAL_CHECK(src_x.get_partial_shape().is_static(), "DequantizeLinear cannot operate with dynamic shapes of input X"); - const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); - - if (inputs.size() > 2) { - zp = inputs[2]; - if (zp.get_element_type() != scale.get_element_type()) { - zp = std::make_shared(zp, scale); - } - zp = std::make_shared(zp, unsqueezed_axes); - } - const auto axis = node.get_attribute_value("axis", 1); const auto block_size = static_cast(node.get_attribute_value("block_size", 0)); - const auto scale_type = scale.get_element_type(); FRONT_END_GENERAL_CHECK(axis == 0, "Axis != 0 isn't supported"); FRONT_END_GENERAL_CHECK(block_size > 0, "block_size must be greater than zero"); @@ -241,16 +231,31 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { src_x.get_shape()[0] % block_size == 0, "DequantizeLinear doesn't support case when first dimension of X cannot be divided by block_size"); - const auto& x = src_x.get_element_type() == scale_type ? src_x : std::make_shared(src_x, scale); + ov::Output broadcastable_x = op::util::reshape( + src_x, + Shape{static_cast(src_x.get_shape()[0]) / block_size, block_size, src_x.get_shape()[1]}); + + const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); + + const auto scale_type = scale.get_element_type(); + if (inputs.size() > 2) { + zp = inputs[2]; + if (zp.get_element_type() != scale.get_element_type()) { + zp = std::make_shared(zp, scale_type); + disable_constant_folding(zp.get_node_shared_ptr()); + } + zp = std::make_shared(zp, unsqueezed_axes); + } + + const auto& x = src_x.get_element_type() == scale_type ? broadcastable_x + : std::make_shared(broadcastable_x, scale_type); // For further broadcasting scales and zp - reshape input to a shape [x.shape[0]/block_size, block_size, x.shape[1]] - ov::Output broadcastable_x = - op::util::reshape(x, Shape{static_cast(x.get_shape()[0]) / block_size, block_size, x.get_shape()[1]}); // Adding additional dimension for broadcasting scale = std::make_shared(scale, unsqueezed_axes); if (zp.get_node_shared_ptr()) { - broadcastable_x = std::make_shared(broadcastable_x, zp); + broadcastable_x = std::make_shared(x, zp); } const auto& scaled_x = std::make_shared(broadcastable_x, scale); diff --git a/src/frontends/onnx/frontend/src/op/lstm.cpp b/src/frontends/onnx/frontend/src/op/lstm.cpp index 6b934db7d13071..1c0efbbe6b9454 100644 --- a/src/frontends/onnx/frontend/src/op/lstm.cpp +++ b/src/frontends/onnx/frontend/src/op/lstm.cpp @@ -211,41 +211,19 @@ ov::OutputVector lstm(const ov::frontend::onnx::Node& node) { LSTMAttributes attributes{node}; std::shared_ptr lstm_sequence; - if ((input_map.at(LSTMInput::LSTM_INPUT_P).get_names() != std::unordered_set({"P_blank"})) || - (attributes.m_input_forget == true)) { - OPENVINO_SUPPRESS_DEPRECATED_START - lstm_sequence = std::make_shared(input_map.at(LSTMInput::LSTM_INPUT_X), - input_map.at(LSTMInput::LSTM_INPUT_INIT_H), - input_map.at(LSTMInput::LSTM_INPUT_INIT_C), - input_map.at(LSTMInput::LSTM_INPUT_SEQ_LENGTHS), - input_map.at(LSTMInput::LSTM_INPUT_W), - input_map.at(LSTMInput::LSTM_INPUT_R), - input_map.at(LSTMInput::LSTM_INPUT_B), - input_map.at(LSTMInput::LSTM_INPUT_P), - attributes.m_hidden_size, - attributes.m_direction, - ov::op::LSTMWeightsFormat::FICO, - attributes.m_activation_alpha, - attributes.m_activation_beta, - attributes.m_activations, - attributes.m_clip_threshold, - attributes.m_input_forget); - OPENVINO_SUPPRESS_DEPRECATED_END - } else { - lstm_sequence = std::make_shared(input_map.at(LSTMInput::LSTM_INPUT_X), - input_map.at(LSTMInput::LSTM_INPUT_INIT_H), - input_map.at(LSTMInput::LSTM_INPUT_INIT_C), - input_map.at(LSTMInput::LSTM_INPUT_SEQ_LENGTHS), - input_map.at(LSTMInput::LSTM_INPUT_W), - input_map.at(LSTMInput::LSTM_INPUT_R), - input_map.at(LSTMInput::LSTM_INPUT_B), - attributes.m_hidden_size, - attributes.m_direction, - attributes.m_activation_alpha, - attributes.m_activation_beta, - attributes.m_activations, - attributes.m_clip_threshold); - } + lstm_sequence = std::make_shared(input_map.at(LSTMInput::LSTM_INPUT_X), + input_map.at(LSTMInput::LSTM_INPUT_INIT_H), + input_map.at(LSTMInput::LSTM_INPUT_INIT_C), + input_map.at(LSTMInput::LSTM_INPUT_SEQ_LENGTHS), + input_map.at(LSTMInput::LSTM_INPUT_W), + input_map.at(LSTMInput::LSTM_INPUT_R), + input_map.at(LSTMInput::LSTM_INPUT_B), + attributes.m_hidden_size, + attributes.m_direction, + attributes.m_activation_alpha, + attributes.m_activation_beta, + attributes.m_activations, + attributes.m_clip_threshold); const auto Y = lstm_sequence->output(0); const auto Y_h = lstm_sequence->output(1); diff --git a/src/frontends/onnx/frontend/src/utils/common.cpp b/src/frontends/onnx/frontend/src/utils/common.cpp index 66fdcf1c7830c7..46c7be75bbdd66 100644 --- a/src/frontends/onnx/frontend/src/utils/common.cpp +++ b/src/frontends/onnx/frontend/src/utils/common.cpp @@ -42,10 +42,8 @@ const ov::element::Type& get_ov_element_type(int64_t onnx_type) { return ov::element::f16; case TensorProto_DataType::TensorProto_DataType_FLOAT: return ov::element::f32; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_INT4: return ov::element::i4; -#endif case TensorProto_DataType::TensorProto_DataType_INT8: return ov::element::i8; case TensorProto_DataType::TensorProto_DataType_INT16: @@ -54,10 +52,8 @@ const ov::element::Type& get_ov_element_type(int64_t onnx_type) { return ov::element::i32; case TensorProto_DataType::TensorProto_DataType_INT64: return ov::element::i64; -#ifdef ONNX_VERSION_116 case TensorProto_DataType::TensorProto_DataType_UINT4: return ov::element::u4; -#endif case TensorProto_DataType::TensorProto_DataType_UINT8: return ov::element::u8; case TensorProto_DataType::TensorProto_DataType_UINT16: @@ -77,15 +73,9 @@ const ov::element::Type& get_ov_element_type(int64_t onnx_type) { case TensorProto_DataType::TensorProto_DataType_STRING: return ov::element::string; } -#ifdef ONNX_VERSION_116 ONNX_UNSUPPORTED_DATA_TYPE(onnx_type, "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, " "INT32, INT64, UINT4, UINT8, UINT16, UINT32, UINT64, STRING, UNDEFINED"); -#else - ONNX_UNSUPPORTED_DATA_TYPE(onnx_type, - "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT8, INT16, " - "INT32, INT64, UINT8, UINT16, UINT32, UINT64, STRING, UNDEFINED"); -#endif } void default_op_checks(const Node& node, size_t min_inputs_size) { diff --git a/src/frontends/onnx/onnx_common/CMakeLists.txt b/src/frontends/onnx/onnx_common/CMakeLists.txt index a743c5ac40a0dd..d63bce4083087c 100644 --- a/src/frontends/onnx/onnx_common/CMakeLists.txt +++ b/src/frontends/onnx/onnx_common/CMakeLists.txt @@ -35,18 +35,3 @@ ov_link_system_libraries(${TARGET_NAME} PUBLIC onnx_proto onnx) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) - -# Temporary solution until vcpkg doesn't have fresh ONNX, -# trying determine used version of ONNX to enable modern functionality -find_package(ONNX 1.16.0 QUIET COMPONENTS onnx onnx_proto NO_MODULE) -if(ONNX_FOUND) - target_compile_definitions(${TARGET_NAME} PUBLIC ONNX_VERSION_116) -else() - if(EXISTS "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER") - file(READ "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER" ONNX_VERSION) - string(STRIP "${ONNX_VERSION}" ONNX_VERSION) - if((ONNX_VERSION GREATER "1.16.0") OR (ONNX_VERSION EQUAL "1.16.0")) - target_compile_definitions(${TARGET_NAME} PUBLIC ONNX_VERSION_116) - endif() - endif() -endif() diff --git a/src/frontends/onnx/onnx_common/src/utils.cpp b/src/frontends/onnx/onnx_common/src/utils.cpp index 6ec409c5671458..b83dea1b4cfd99 100644 --- a/src/frontends/onnx/onnx_common/src/utils.cpp +++ b/src/frontends/onnx/onnx_common/src/utils.cpp @@ -30,10 +30,8 @@ size_t get_onnx_data_size(int32_t onnx_type) { return sizeof(ov::float8_e4m3); case TensorProto_DataType_FLOAT8E5M2: return sizeof(ov::float8_e5m2); -#ifdef ONNX_VERSION_116 case TensorProto_DataType_INT4: return sizeof(int8_t); -#endif case TensorProto_DataType_INT8: return sizeof(int8_t); case TensorProto_DataType_INT16: @@ -42,10 +40,8 @@ size_t get_onnx_data_size(int32_t onnx_type) { return sizeof(int32_t); case TensorProto_DataType_INT64: return sizeof(int64_t); -#ifdef ONNX_VERSION_116 case TensorProto_DataType_UINT4: return sizeof(uint8_t); -#endif case TensorProto_DataType_UINT8: return sizeof(uint8_t); case TensorProto_DataType_UINT16: @@ -66,16 +62,12 @@ const std::map OV_2_ONNX_TYPES = { {ov::element::Type_t::f16, TensorProto_DataType::TensorProto_DataType_FLOAT16}, {ov::element::Type_t::f32, TensorProto_DataType::TensorProto_DataType_FLOAT}, {ov::element::Type_t::f64, TensorProto_DataType::TensorProto_DataType_DOUBLE}, -#ifdef ONNX_VERSION_116 {ov::element::Type_t::i4, TensorProto_DataType::TensorProto_DataType_INT4}, -#endif {ov::element::Type_t::i8, TensorProto_DataType::TensorProto_DataType_INT8}, {ov::element::Type_t::i16, TensorProto_DataType::TensorProto_DataType_INT16}, {ov::element::Type_t::i32, TensorProto_DataType::TensorProto_DataType_INT32}, {ov::element::Type_t::i64, TensorProto_DataType::TensorProto_DataType_INT64}, -#ifdef ONNX_VERSION_116 {ov::element::Type_t::u4, TensorProto_DataType::TensorProto_DataType_UINT4}, -#endif {ov::element::Type_t::u8, TensorProto_DataType::TensorProto_DataType_UINT8}, {ov::element::Type_t::u16, TensorProto_DataType::TensorProto_DataType_UINT16}, {ov::element::Type_t::u32, TensorProto_DataType::TensorProto_DataType_UINT32}, diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt index 599c7c43b05395..f508fdb4c1a903 100644 --- a/src/frontends/onnx/tests/CMakeLists.txt +++ b/src/frontends/onnx/tests/CMakeLists.txt @@ -134,21 +134,6 @@ target_compile_definitions(ov_onnx_frontend_tests set(ONNX_OPSET_VERSION 17 CACHE INTERNAL "Supported version of ONNX operator set") target_compile_definitions(ov_onnx_frontend_tests PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION}) -# Temporary solution until vcpkg doesn't have fresh ONNX, -# trying determine used version of ONNX to enable modern functionality -find_package(ONNX 1.16.0 QUIET COMPONENTS onnx onnx_proto NO_MODULE) -if(ONNX_FOUND) - target_compile_definitions(ov_onnx_frontend_tests PRIVATE ONNX_VERSION_116) -else() - if(EXISTS "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER") - file(READ "${CMAKE_SOURCE_DIR}/thirdparty/onnx/onnx/VERSION_NUMBER" ONNX_VERSION) - string(STRIP "${ONNX_VERSION}" ONNX_VERSION) - if((ONNX_VERSION GREATER "1.16.0") OR (ONNX_VERSION EQUAL "1.16.0")) - target_compile_definitions(ov_onnx_frontend_tests PRIVATE ONNX_VERSION_116) - endif() - endif() -endif() - if(ONNX_TESTS_DEPENDENCIES) add_dependencies(ov_onnx_frontend_tests ${ONNX_TESTS_DEPENDENCIES}) endif() @@ -182,8 +167,11 @@ add_custom_command(TARGET ov_onnx_frontend_tests POST_BUILD ${custom_commands} COMMENT "Copy test manifest files to ${TEST_MODEL_ZOO}/onnx") -# process models +# Process models add_dependencies(ov_onnx_frontend_tests test_model_zoo) +# Working with ModelProto +ov_link_system_libraries(ov_onnx_frontend_tests PUBLIC onnx_proto onnx) + add_subdirectory(standalone_build) add_dependencies(ov_onnx_frontend_tests onnx_fe_standalone_build_test) diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py index fb29faa38b46ec..ef8cebfa361e3f 100644 --- a/src/frontends/onnx/tests/__init__.py +++ b/src/frontends/onnx/tests/__init__.py @@ -182,3 +182,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_139936 = xfail_test(reason = "MaxPool accuracy fails") xfail_issue_139937 = xfail_test(reason = "GroupNorm, QLinearMatMul, DequantizeLinear translation failed") xfail_issue_139938 = xfail_test(reason = "QLinearMatMul accuracy fails") + +# ONNX 1.17 +skip_issue_119896 = pytest.mark.skip(reason="Unsupported element type: FLOAT8") diff --git a/src/frontends/onnx/tests/ci_utils/onnxruntime/version b/src/frontends/onnx/tests/ci_utils/onnxruntime/version index 85c399f04d78df..774958e19512de 100644 --- a/src/frontends/onnx/tests/ci_utils/onnxruntime/version +++ b/src/frontends/onnx/tests/ci_utils/onnxruntime/version @@ -1 +1 @@ -rel-1.18.1 +rel-1.19.2 diff --git a/src/frontends/onnx/tests/load_from.cpp b/src/frontends/onnx/tests/load_from.cpp index 617f4a917567d5..547937ac52171f 100644 --- a/src/frontends/onnx/tests/load_from.cpp +++ b/src/frontends/onnx/tests/load_from.cpp @@ -4,6 +4,7 @@ #include "load_from.hpp" #include +#include #include @@ -61,3 +62,68 @@ INSTANTIATE_TEST_SUITE_P(ONNXLoadTest, FrontEndLoadFromTest, ::testing::Values(getTestData()), FrontEndLoadFromTest::getTestCaseName); + +// !!! Experimental feature, it may be changed or removed in the future !!! +using ::ONNX_NAMESPACE::ModelProto; +using ::ONNX_NAMESPACE::Version; + +TEST_P(FrontEndLoadFromTest, testLoadFromModelProtoUint64) { + const auto path = + ov::util::path_join({ov::test::utils::getExecutableDirectory(), TEST_ONNX_MODELS_DIRNAME, "abs.onnx"}); + std::ifstream ifs(path, std::ios::in | std::ios::binary); + ASSERT_TRUE(ifs.is_open()) << "Could not open an ifstream for the model path: " << path; + std::vector frontends; + FrontEnd::Ptr fe; + + { + auto model_proto = std::make_shared(); + ASSERT_TRUE(model_proto->ParseFromIstream(&ifs)) << "Could not parse ModelProto from file: " << path; + + uint64_t model_proto_ptr = reinterpret_cast(model_proto.get()); + + ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_model(model_proto_ptr)) + << "Could not create the ONNX FE using a pointer on ModelProto object as uint64_t"; + ASSERT_NE(m_frontEnd, nullptr); + ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(model_proto_ptr)) << "Could not load the model"; + ASSERT_NE(m_inputModel, nullptr); + } + + std::shared_ptr model; + ASSERT_NO_THROW(model = m_frontEnd->convert(m_inputModel)) << "Could not convert the model to OV representation"; + ASSERT_NE(model, nullptr); + + ASSERT_TRUE(model->get_ordered_ops().size() > 0); +} + +TEST_P(FrontEndLoadFromTest, testLoadFromModelProtoUint64_Negative) { + const auto path = + ov::util::path_join({ov::test::utils::getExecutableDirectory(), TEST_ONNX_MODELS_DIRNAME, "abs.onnx"}); + std::ifstream ifs(path, std::ios::in | std::ios::binary); + ASSERT_TRUE(ifs.is_open()) << "Could not open an ifstream for the model path: " << path; + std::vector frontends; + FrontEnd::Ptr fe; + + auto model_proto = std::make_shared(); + ASSERT_TRUE(model_proto->ParseFromIstream(&ifs)) << "Could not parse ModelProto from file: " << path; + + uint64_t model_proto_ptr = reinterpret_cast(model_proto.get()); + + ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_model(model_proto_ptr)) + << "Could not create the ONNX FE using a pointer on ModelProto object as uint64_t"; + ASSERT_NE(m_frontEnd, nullptr); + // Should say unsupported if an address is 0 + ASSERT_FALSE(m_frontEnd->supported(static_cast(0))); + // Should throw an ov::Exception if address is 0 + OV_EXPECT_THROW(m_inputModel = m_frontEnd->load(static_cast(0)), + ov::Exception, + testing::HasSubstr("Wrong address")); + + model_proto->set_ir_version(Version::IR_VERSION + 1); + // Should say unsupported if ModelProto has IR_VERSION higher than supported + ASSERT_FALSE(m_frontEnd->supported(model_proto_ptr)); + // Should throw an ov::Exception if address is 0 + OV_EXPECT_THROW(m_inputModel = m_frontEnd->load(model_proto_ptr), + ov::Exception, + testing::HasSubstr("unsupported IR version")); +} +// !!! End of Experimental feature !!! diff --git a/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt b/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt new file mode 100644 index 00000000000000..4fb110fd485833 --- /dev/null +++ b/src/frontends/onnx/tests/models/com.microsoft/quick_gelu.prototxt @@ -0,0 +1,52 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + name: "test_quick_gelu" + node { + input: "X" + output: "Y" + op_type: "QuickGelu" + attribute { + name: "alpha" + f: 0.9974269270896912 + type: FLOAT + } + domain: "com.microsoft" + } + input { + name: "X" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 5 + } + } + } + } + } +} +opset_import { + domain: "com.microsoft" + version: 1 +} \ No newline at end of file diff --git a/src/frontends/onnx/tests/models/lstm_bdir_short_input_seq.prototxt b/src/frontends/onnx/tests/models/lstm_bdir_short_input_seq.prototxt deleted file mode 100644 index 57b723e44b6914..00000000000000 --- a/src/frontends/onnx/tests/models/lstm_bdir_short_input_seq.prototxt +++ /dev/null @@ -1,235 +0,0 @@ -ir_version: 5 -graph { - node { - input: "X" - input: "W" - input: "R" - input: "B" - input: "sequence_lens" - input: "initial_h" - input: "initial_c" - input: "P" - output: "Y" - output: "Y_h" - output: "" - name: "node1" - op_type: "LSTM" - attribute { - name: "direction" - s: "bidirectional" - type: STRING - } - attribute { - name: "input_forget" - i: 0 - type: INT - } - attribute { - name: "activations" - strings: "sigmoid" - strings: "tanh" - strings: "tanh" - strings: "sigmoid" - strings: "tanh" - strings: "tanh" - type: STRINGS - } - attribute { - name: "hidden_size" - i: 2 - type: INT - } - attribute { - name: "clip" - f: 9999 - type: FLOAT - } - doc_string: "LSTM" - domain: "" - } - input { - name: "X" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "W" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "R" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "B" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 16 - } - } - } - } - } - input { - name: "sequence_lens" - type { - tensor_type { - elem_type: 6 - shape { - dim { - dim_value: 1 - } - } - } - } - } - input { - name: "initial_h" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "initial_c" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "P" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 6 - } - } - } - } - } - output { - name: "Y" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - output { - name: "Y_h" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } -} -opset_import { - domain: "" - version: 7 -} diff --git a/src/frontends/onnx/tests/models/lstm_fwd_with_clip_peepholes.prototxt b/src/frontends/onnx/tests/models/lstm_fwd_with_clip_peepholes.prototxt deleted file mode 100644 index 2a218f089240a5..00000000000000 --- a/src/frontends/onnx/tests/models/lstm_fwd_with_clip_peepholes.prototxt +++ /dev/null @@ -1,191 +0,0 @@ -ir_version: 4 -producer_name: "OpenVINO ONNX Frontend" -graph { - node { - input: "X" - input: "W" - input: "R" - input: "B" - input: "" - input: "" - input: "" - input: "P" - output: "Y" - output: "Y_h" - output: "Y_c" - op_type: "LSTM" - attribute { - name: "clip" - f: 0.10000000149011612 - type: FLOAT - } - attribute { - name: "direction" - s: "forward" - type: STRING - } - attribute { - name: "hidden_size" - i: 2 - type: INT - } - attribute { - name: "input_forget" - i: 0 - type: INT - } - } - name: "compute_graph" - input { - name: "X" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "W" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "R" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 8 - } - dim { - dim_value: 2 - } - } - } - } - } - input { - name: "B" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 16 - } - } - } - } - } - input { - name: "P" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 6 - } - } - } - } - } - output { - name: "Y" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 2 - } - dim { - dim_value: 1 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - output { - name: "Y_h" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } - output { - name: "Y_c" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - } - } - } - } -} -opset_import { - version: 7 -} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index bc27a759d415a0..c57cb2babc569b 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -159,7 +159,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_bool_init_raw) { test_case.run(); } -#ifdef ONNX_VERSION_116 OPENVINO_TEST(${BACKEND_NAME}, onnx_int4_const) { auto model = convert_model("int4_const.onnx"); @@ -195,7 +194,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_uint4_input) { test_case.run(); } -#endif OPENVINO_TEST(${BACKEND_NAME}, onnx_model_add_abc_initializers) { auto model = convert_model("add_abc_initializers.onnx"); diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp index da8189926a4546..900fc025d8d9ab 100644 --- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp @@ -1330,3 +1330,29 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_matmulnbits_3x17) { } test_case.run(); } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_quickgelu) { + const auto model = convert_model("com.microsoft/quick_gelu.onnx"); + auto test_case = ov::test::TestCase(model, s_device); + + const std::vector input_X{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + const std::vector output{0.7305524f, + 1.7605114f, + 2.8566725f, + 3.9273243f, + 4.9661055f, + 5.984934f, + 6.9935064f, + 7.997261f, + 8.998864f, + 9.999535f}; + + test_case.add_input(Shape{2, 5}, input_X); + test_case.add_expected_output(Shape{2, 5}, output); + + if (std::string("${BACKEND_NAME}") == std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(0.0001f); + } else { + test_case.run(); + } +} diff --git a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp index 5979466849d5e6..b61fd1d36f99e0 100644 --- a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp @@ -202,72 +202,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_mixed_seq_const) { test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } -OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_peepholes) { - auto model = convert_model("lstm_fwd_with_clip_peepholes.onnx"); - - auto test_case = ov::test::TestCase(model, s_device); - test_case.add_input({-0.455351f, -0.276391f, -0.185934f, -0.269585f}); // X - test_case.add_input({-0.494659f, // W - 0.0453352f, - -0.487793f, - 0.417264f, - -0.0175329f, - 0.489074f, - -0.446013f, - 0.414029f, - -0.0091708f, - -0.255364f, - -0.106952f, - -0.266717f, - -0.0888852f, - -0.428709f, - -0.283349f, - 0.208792f}); // W - test_case.add_input({0.146626f, - -0.0620289f, - -0.0815302f, - 0.100482f, - -0.219535f, - -0.306635f, - -0.28515f, - -0.314112f, - -0.228172f, - 0.405972f, - 0.31576f, - 0.281487f, - -0.394864f, - 0.42111f, - -0.386624f, - -0.390225f}); // R - - test_case.add_input({0.381619f, - 0.0323954f, - -0.14449f, - 0.420804f, - -0.258721f, - 0.45056f, - -0.250755f, - 0.0967895f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f}); // B - test_case.add_input({0.2345f, 0.5235f, 0.4378f, 0.3475f, 0.8927f, 0.3456f}); // P - - test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.02280854f, 0.02744377f, -0.03516197f, 0.03875681f}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.03516197f, 0.03875681f}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.07415761f, 0.07395997f}); // Y_c_data - - // We have to enlarge tolerance bits to 3 - it's only one bit more than default value. - // The discrepancies may occur at most on 7th decimal position. - test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); -} - OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq) { auto model = convert_model("lstm_fwd_mixed_seq.onnx"); @@ -397,50 +331,6 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_large_batch_no_clip) { test_case.run(); } -OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_bdir_short_input_seq_peepholes) { - auto model = convert_model("lstm_bdir_short_input_seq.onnx"); - - auto test_case = ov::test::TestCase(model, s_device); - - // X - test_case.add_input({-0.455351f, -0.276391f, -0.185934f, -0.269585f}); - // W - test_case.add_input( - {-0.494659f, 0.0453352f, -0.487793f, 0.417264f, -0.0175329f, 0.489074f, -0.446013f, 0.414029f, - -0.0091708f, -0.255364f, -0.106952f, -0.266717f, -0.0888852f, -0.428709f, -0.283349f, 0.208792f, - -0.494659f, 0.0453352f, -0.487793f, 0.417264f, -0.0175329f, 0.489074f, -0.446013f, 0.414029f, - -0.0091708f, -0.255364f, -0.106952f, -0.266717f, -0.0888852f, -0.428709f, -0.283349f, 0.208792f}); - // R - test_case.add_input({0.146626f, -0.0620289f, -0.0815302f, 0.100482f, -0.219535f, -0.306635f, -0.28515f, - -0.314112f, -0.228172f, 0.405972f, 0.31576f, 0.281487f, -0.394864f, 0.42111f, - -0.386624f, -0.390225f, 0.146626f, -0.0620289f, -0.0815302f, 0.100482f, -0.219535f, - -0.306635f, -0.28515f, -0.314112f, -0.228172f, 0.405972f, 0.31576f, 0.281487f, - -0.394864f, 0.42111f, -0.386624f, -0.390225f}); - // B - test_case.add_input({0.381619f, 0.0323954f, -0.14449f, 0.420804f, -0.258721f, 0.45056f, -0.250755f, - 0.0967895f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.381619f, 0.0323954f, -0.14449f, 0.420804f, -0.258721f, - 0.45056f, -0.250755f, 0.0967895f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f}); - // sequence_lens - test_case.add_input({1}); - // initial_h - test_case.add_input({0.0f, 0.0f, -0.0306872f, 0.028035f}); - // initial_c - test_case.add_input({0.0f, 0.0f, -0.07243599f, 0.0467052f}); - // P - test_case.add_input( - {0.2345f, 0.5235f, 0.4378f, 0.3475f, 0.8927f, 0.3456f, 0.2345f, 0.5235f, 0.4378f, 0.3475f, 0.8927f, 0.3456f}); - - // Y - test_case.add_expected_output(Shape{2, 2, 1, 2}, - {-0.0251062f, 0.0561262f, -0.0318928f, 0.0762679f, 0.0f, 0.0f, 0.0f, 0.0f}); - // Y_h - test_case.add_expected_output(Shape{2, 1, 2}, {-0.0251062f, 0.0561262f, -0.0318928f, 0.0762679f}); - - test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); -} - OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_mixed_seq_reverse) { auto model = convert_model("lstm_mixed_seq_reverse.onnx"); diff --git a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest index b2e87a2588ec65..3eadb698ca0a03 100644 --- a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest +++ b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest @@ -121,9 +121,6 @@ onnx_model_lp_norm_default_dynamic onnx_instance_normalization_dynamic # Legacy tests with unsupported features from opset4 LSTM/GRU/RNN -# Peepholes input unsupported -onnx_model_lstm_fwd_with_clip_peepholes -onnx_model_lstm_bdir_short_input_seq_peepholes # Activation function hardsigmoid is not supported onnx_model_gru_fwd_activations_relu_hardsigmoid onnx_model_lstm_fwd_hardsigmoid_activation diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index ca4f90ed5d94be..39b9788d720af3 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -57,6 +57,7 @@ xfail_issue_113506, skip_dynamic_model, xfail_issue_119896, + skip_issue_119896, xfail_issue_119900, xfail_issue_119903, xfail_issue_119906, @@ -246,7 +247,11 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_maxunpool_export_with_output_shape_cpu", "OnnxBackendNodeModelTest.test_maxunpool_export_without_output_shape_cpu", ), - (xfail_issue_38724, "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_cpu"), + ( + xfail_issue_38724, + "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_cpu", + "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_extrapolation_value_cpu" + ), ( xfail_issue_33606, "OnnxBackendNodeModelTest.test_det_2d_cpu", @@ -454,6 +459,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_2_3_cpu", "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_axes_3_2_cpu", "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_not_larger_cpu", + "OnnxBackendNodeModelTest.test_resize_upsample_sizes_nearest_not_smaller_cpu", ), ( xfail_issue_99970, @@ -520,6 +526,13 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_dequantizelinear_e4m3fn_float16_cpu", "OnnxBackendNodeModelTest.test_dequantizelinear_e4m3fn_zero_point_cpu", ), + ( + skip_issue_119896, + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FN_cpu", + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2_cpu", + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FN_cpu", + "OnnxBackendNodeModelTest.test_cast_no_saturate_FLOAT_to_FLOAT8E5M2_cpu", + ), ( xfail_issue_119900, "OnnxBackendNodeModelTest.test_resize_downsample_scales_linear_half_pixel_symmetric_cpu", @@ -626,6 +639,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None skip_misalignment, "OnnxBackendNodeModelTest.test_gelu_default_2_expanded_cpu", "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_empty_set_expanded_cpu", + "OnnxBackendNodeModelTest.test_reduce_max_empty_set_cpu", "OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu", "OnnxBackendNodeModelTest.test_group_normalization_example_cpu", "OnnxBackendNodeModelTest.test_qlinearmatmul_3D_int8_float16_cpu", diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index c6febe08437b5d..163b4d894cb766 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -10,7 +10,9 @@ #endif #include -#ifndef OV_PROTOBUF_ABSL_IS_USED +#ifdef OV_PROTOBUF_ABSL_IS_USED +# include +#else # include #endif @@ -594,7 +596,9 @@ PADDLE_C_API void* get_front_end_data() { #ifndef OPENVINO_DEBUG_ENABLE // disable protobuf logging -# ifndef OV_PROTOBUF_ABSL_IS_USED +# ifdef OV_PROTOBUF_ABSL_IS_USED + absl::SetGlobalVLogLevel(0); +# else google::protobuf::SetLogHandler(nullptr); # endif #endif diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 5906043e51262d..2b0ab6db9d3a09 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -320,6 +320,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { model->get_rt_info().erase("symmetric_quantization"); } manager.register_pass(sym); + manager.register_pass(); manager.register_pass(); manager.run_passes(model); diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp index 565b0cdbd39385..6a8c370ef2b410 100644 --- a/src/frontends/pytorch/src/node_context.cpp +++ b/src/frontends/pytorch/src/node_context.cpp @@ -4,6 +4,7 @@ #include "openvino/frontend/pytorch/node_context.hpp" +#include "helper_ops/internal_op.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/frontend/pytorch/decoder.hpp" @@ -151,13 +152,26 @@ OutputVector NodeContext::inputs() const { if (input == 0) { // Case when input can be inlined (possible only for fx decoder) if (m_decoder->is_input_inlined(i)) { - auto inlined_input = m_decoder->inlined_input(i); - FRONT_END_GENERAL_CHECK(inlined_input.size() == 1, - "Incorrect inlined input with index: ", - i, - " for operation ", - get_op_type()); - res.push_back(inlined_input[0]); + if (input_is_none(i)) { + // some operations like aten.index.Tensor can have None inputs + auto dummy_decoder = std::make_shared("torch::None", 1); + auto fw_node = std::make_shared(dummy_decoder, OutputVector{}); + auto attrs = fw_node->get_attrs(); + attrs["none_value"] = ""; + attrs[PtFrameworkNode::failed_conversion_key] = + "None constant cannot be converted to OpenVINO opset and should be removed by consuming " + "operation."; + fw_node->set_attrs(attrs); + res.push_back(fw_node->output(0)); + } else { + auto inlined_input = m_decoder->inlined_input(i); + FRONT_END_GENERAL_CHECK(inlined_input.size() == 1, + "Incorrect inlined input with index: ", + i, + " for operation ", + get_op_type()); + res.push_back(inlined_input[0]); + } continue; } } diff --git a/src/frontends/pytorch/src/op/atan2.cpp b/src/frontends/pytorch/src/op/atan2.cpp new file mode 100644 index 00000000000000..341f1c201eae56 --- /dev/null +++ b/src/frontends/pytorch/src/op/atan2.cpp @@ -0,0 +1,99 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#define _USE_MATH_DEFINES + +#include + +#include + +#include "openvino/core/type/element_type.hpp" +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/atan.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/greater.hpp" +#include "openvino/op/greater_eq.hpp" +#include "openvino/op/less.hpp" +#include "openvino/op/logical_and.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_atan2(const NodeContext& context) { + // atan2(input, other, *) → Tensor + num_inputs_check(context, 2, 2); + Output lhs; + Output rhs; + + std::tie(lhs, rhs) = get_inputs_with_promoted_types(context, 0, 1); + + auto div = context.mark_node(std::make_shared(lhs, rhs)); + + auto atan = context.mark_node(std::make_shared(div)); + + // create some constants to adjust result according to quadrant. + auto zero = context.mark_node(v0::Constant::create(ov::element::i32, Shape{}, {0})); + auto pi = context.mark_node(v0::Constant::create(ov::element::f64, Shape{}, {M_PI})); + auto half_pi = context.mark_node(v0::Constant::create(ov::element::f64, Shape{}, {M_PI_2})); + auto neg_half_pi = context.mark_node(v0::Constant::create(ov::element::f64, Shape{}, {-M_PI_2})); + + zero = context.mark_node(std::make_shared(zero, rhs)); + pi = context.mark_node(std::make_shared(pi, rhs)); + half_pi = context.mark_node(std::make_shared(half_pi, rhs)); + neg_half_pi = context.mark_node(std::make_shared(neg_half_pi, rhs)); + + // x > 0, no adjustment needed + auto x_greater_than_zero = context.mark_node(std::make_shared(rhs, zero)); + + // x < 0 and y >= 0, need to plus pi + auto y_greater_equal_zero = context.mark_node(std::make_shared(lhs, zero)); + auto x_less_than_zero = context.mark_node(std::make_shared(rhs, zero)); + auto add_pi_condition = context.mark_node(std::make_shared(x_less_than_zero, y_greater_equal_zero)); + + // x < 0 and y < 0, need to minus pi + auto y_less_than_zero = std::make_shared(lhs, zero); + auto subtract_pi_condition = + context.mark_node(std::make_shared(x_less_than_zero, y_less_than_zero)); + + // x = 0 and y > 0, pi/2 + auto x_equal_zero = std::make_shared(rhs, zero); + auto y_greater_than_zero = std::make_shared(lhs, zero); + auto half_pi_condition = context.mark_node(std::make_shared(x_equal_zero, y_greater_than_zero)); + + // x = 0 and y < 0, -pi/2 + auto neg_half_pi_condition = context.mark_node(std::make_shared(x_equal_zero, y_less_than_zero)); + + auto special_case_condition = + context.mark_node(std::make_shared(half_pi_condition, neg_half_pi_condition)); + + // do adjustment + auto atan_plus_pi = context.mark_node(std::make_shared(atan, pi)); + auto atan_minus_pi = context.mark_node(std::make_shared(atan, pi)); + + // select result + auto ajusted_case = context.mark_node(std::make_shared(add_pi_condition, atan_plus_pi, atan_minus_pi)); + auto special_case = context.mark_node(std::make_shared(half_pi_condition, half_pi, neg_half_pi)); + auto adjusted_atan = context.mark_node(std::make_shared(x_greater_than_zero, atan, ajusted_case)); + auto result = context.mark_node(std::make_shared(special_case_condition, special_case, adjusted_atan)); + + return {result}; +} + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/avg_poolnd.cpp b/src/frontends/pytorch/src/op/avg_poolnd.cpp index 03c32259b45091..d8223b04bfe690 100644 --- a/src/frontends/pytorch/src/op/avg_poolnd.cpp +++ b/src/frontends/pytorch/src/op/avg_poolnd.cpp @@ -3,12 +3,17 @@ // #include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/avg_pool.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/pad.hpp" -#include "openvino/op/subtract.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/unsqueeze.hpp" #include "utils.hpp" namespace ov { @@ -17,10 +22,31 @@ namespace pytorch { namespace op { using namespace ov::op; - -OutputVector translate_avg_poolnd(const NodeContext& context) { +OutputVector translate_avg_pool_base(const NodeContext& context, int dims) { num_inputs_check(context, 2, 7); auto input = context.get_input(0); + auto input_shape = context.mark_node(std::make_shared(input)); + + auto const_0 = v0::Constant::create(element::i64, Shape{1}, {0}); + auto const_1 = v0::Constant::create(element::i64, Shape{1}, {1}); + bool is_static = input.get_partial_shape().rank().is_static(); + bool no_batch_dim = is_static && input.get_partial_shape().rank().get_length() == dims + 1; + + if (is_static) { + if (no_batch_dim) { + input = context.mark_node(std::make_shared(input, const_0)); + } + } else { + input = context.mark_node(std::make_shared(input, const_0)); + auto unsqueeze_shape = context.mark_node(std::make_shared(input)); + auto rank = context.mark_node(std::make_shared(unsqueeze_shape)); + auto end_index = context.mark_node(std::make_shared(rank, const_1)); + auto start_index = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims - 2})); + auto reshape_pattern = + context.mark_node(std::make_shared(unsqueeze_shape, start_index, end_index, const_1, const_0)); + input = context.mark_node(std::make_shared(input, reshape_pattern, true)); + } + auto kernel = context.const_input(1); Strides strides; if (!context.input_is_none(2)) { @@ -47,8 +73,43 @@ OutputVector translate_avg_poolnd(const NodeContext& context) { } PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(6), "Translation for aten::avg_pool2d do not support divisor_override input."); - return {context.mark_node( - std::make_shared(input, strides, pads, pads, kernel, !count_include_pad, rounding_type))}; + auto res = context.mark_node( + std::make_shared(input, strides, pads, pads, kernel, !count_include_pad, rounding_type)); + + if (is_static) { + if (no_batch_dim) { + res = context.mark_node(std::make_shared(res, const_0)); + } + } else { + auto pooled_output_shape = context.mark_node(std::make_shared(res)); + + auto start_index_input = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto slice_input_shape = + context.mark_node(std::make_shared(input_shape, const_0, start_index_input, const_1, const_0)); + + auto start_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto end_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {2 + dims})); + auto slice_pooled_output_shape = context.mark_node( + std::make_shared(pooled_output_shape, start_index_pooled, end_index_pooled, const_1, const_0)); + + auto concat_shape = context.mark_node( + std::make_shared(OutputVector{slice_input_shape, slice_pooled_output_shape}, 0)); + res = context.mark_node(std::make_shared(res, concat_shape, true)); + } + + return {res}; +}; + +OutputVector translate_avg_pool1d(const NodeContext& context) { + return translate_avg_pool_base(context, 1); +}; + +OutputVector translate_avg_pool2d(const NodeContext& context) { + return translate_avg_pool_base(context, 2); +}; + +OutputVector translate_avg_pool3d(const NodeContext& context) { + return translate_avg_pool_base(context, 3); }; } // namespace op diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index 4ae2c4ebc81af4..d4f12cae258ad8 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -146,9 +146,17 @@ OutputVector translate_stack_fx(const NodeContext& context) { num_elements -= 1; } + OutputVector stack_inputs; for (size_t i = 0; i < num_elements; i++) { - auto stack_input = - context.mark_node(std::make_shared(context.get_input(static_cast(i)), dim)); + stack_inputs.push_back(context.get_input(static_cast(i))); + } + + // returns the u4 constant if the stack operation is a part of the decompression pattern + if (const auto& u4_const = u4_compression_stack(stack_inputs, axis)) + return {u4_const}; + + for (size_t i = 0; i < num_elements; i++) { + auto stack_input = context.mark_node(std::make_shared(stack_inputs[i], dim)); list_elems.push_back(stack_input); } return translate_cat_common(context, list_elems, axis, true); diff --git a/src/frontends/pytorch/src/op/index.cpp b/src/frontends/pytorch/src/op/index.cpp index a1e286cad93adc..880e0acee0f983 100644 --- a/src/frontends/pytorch/src/op/index.cpp +++ b/src/frontends/pytorch/src/op/index.cpp @@ -26,191 +26,6 @@ namespace op { using namespace ov::op; -namespace { -Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis) { - // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of - // input tensor. The last dimension is the product of the rest of input tensor dimensions: - // [d_{axis}, ..., d_n] - Output output_shape; - if (axis == 0) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {1, -1}); - } else if (axis == 1) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {0, -1}); - } else { - const auto value_shape = rg.make(value, element::i32); - const auto value_rank = rg.make(value_shape, element::i32); - const auto axis_node = v0::Constant::create(element::i32, Shape{1}, {axis}); - auto start = v0::Constant::create(element::i32, Shape{1}, {0}); - auto step = v0::Constant::create(element::i32, Shape{1}, {1}); - const auto first_part_dims = rg.make(value_shape, start, axis_node, step); - auto zero = v0::Constant::create(element::i32, {}, {0}); - auto first_part_dims_length = rg.make(first_part_dims, zero, true); - - auto remaining_part_length = v0::Constant::create(element::i32, {1}, {-1}); - - output_shape = rg.make(OutputVector{first_part_dims_length, remaining_part_length}, 0); - } - return rg.make(value, output_shape, true); -} - -OutputVector index_on_list(ov::pass::NodeRegistry& rg, - const Output& data, - std::deque> ids, - int64_t rank) { - // Multiple tensors as indices. Each tensor could either be - // 1. prim::Constant() - // representing ":" in python indexing. E.g. tensor[:, :] - // 2. prim::Constant[value=...] or tensor output - // representing advanced indexing. E.g. tensor[[0, 1], [2, 0]]. - // For more info on advanced indexing, - // check https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing - - // Consider a general case of - // t: [x_1, y_1, y_2, ..., x_m, ..., y_n] - // where t is a tensor of rank m+n, {x_i} are axes where tensor index is provided, and {y_i} are axes for - // ":". Same results can be achieved through transposing t into - // t: [x_1, x_2, ..., x_m, y_1, y_2, ..., y_n] - // and use gather - // t: [x_1 * x_2 * ... * x_m, y_1 * y_2 * ... * y_n] - // tensor index = \sum_{i=1}^m (ind_i * \prod_{j=i+1}^m (x_j)) - // After gather, reshape and transpose back. - std::vector advanced_ids; - std::vector is_masked_bool; - OutputVector masked_indicies; - // for case when index is bool e.g. x[x>0], replace index with non_zero - for (size_t i = 0; i < ids.size(); i++) { - // skip dimensions where index is None - bool is_none = false; - if (!ids[i].get_node_shared_ptr()) { - is_none = true; - } - if (auto const_input = cast_fw_node(ids[i].get_node_shared_ptr(), "prim::Constant")) { - const auto& attrs = const_input->get_attrs(); - if (attrs.find("none_value") != attrs.end()) { - is_none = true; - } - } - if (is_none) { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - continue; - } - auto id_dtype = ids[i].get_element_type(); - if (id_dtype == element::boolean || id_dtype == element::u8) { - auto idx = rg.make(ids[i], element::u8); - auto nonzero = rg.make(idx, element::i32); - auto input_order = v0::Constant::create(element::i32, Shape{2}, {1, 0}); - auto masked_id = rg.make(nonzero, input_order); - masked_indicies.push_back(masked_id); - is_masked_bool.push_back(true); - } else { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - } - advanced_ids.push_back(i); - } - - // all indicies prim::Constant(None), return input as is - if (advanced_ids.size() == 0) { - return {data}; - } - // perform gather for single element case - if (advanced_ids.size() == 1) { - auto index = masked_indicies[advanced_ids[0]]; - if (is_masked_bool[advanced_ids[0]]) { - auto gather = rg.make(data, index); - return {gather}; - } - index = rg.make(index, element::i32); - auto dim = v0::Constant::create(element::i32, Shape{}, {advanced_ids[0]}); - auto gather = rg.make(data, index, dim); - return {gather}; - } - auto adv_idx_count = advanced_ids.size(); - auto input_shape = rg.make(data, element::i32); - auto zero = v0::Constant::create(element::i32, Shape{}, {0}); - auto input_dims = rg.make(input_shape, zero, rank); - std::vector non_used_dims; - for (auto i = 0; i < rank; i++) { - if (std::find(advanced_ids.begin(), advanced_ids.end(), i) == advanced_ids.end()) { - non_used_dims.push_back(i); - } - } - std::vector permutation_dims; - permutation_dims.insert(permutation_dims.end(), advanced_ids.begin(), advanced_ids.end()); - permutation_dims.insert(permutation_dims.end(), non_used_dims.begin(), non_used_dims.end()); - auto transpose_dims = v0::Constant::create(element::i32, Shape{permutation_dims.size()}, permutation_dims); - auto transposed_input = rg.make(data, transpose_dims); - auto flatten_input = flatten(rg, transposed_input, adv_idx_count); - auto cum_adv_index = masked_indicies[advanced_ids.back()]; - cum_adv_index = rg.make(cum_adv_index, element::i32); - auto multiplier = input_dims->output(advanced_ids.back()); - for (int i = static_cast(adv_idx_count) - 2; i > -1; i--) { - auto input_id = advanced_ids[i]; - auto m_idx = rg.make(masked_indicies[input_id], element::i32); - auto adv_index = rg.make(m_idx, multiplier); - cum_adv_index = rg.make(cum_adv_index, adv_index); - multiplier = rg.make(multiplier, input_dims->output(input_id)); - } - std::shared_ptr gather = rg.make(flatten_input, cum_adv_index, zero); - OutputVector concat_dims; - // check if all advanced indices are consecutive. - std::vector consequence_dims; - auto cum_adv_index_shape_tensor = rg.make(cum_adv_index, element::i32); - for (size_t i = advanced_ids[0]; i <= advanced_ids[advanced_ids.back()]; i++) { - consequence_dims.push_back(i); - } - // unfold regular index axes - if (advanced_ids == consequence_dims) { - OutputVector folded_adv_idx_shape_vector; - auto minus_one = v0::Constant::create(element::i32, Shape{1}, {-1}); - folded_adv_idx_shape_vector.push_back(minus_one); - for (auto i : non_used_dims) { - folded_adv_idx_shape_vector.push_back(input_dims->output(i)); - } - auto folded_adv_idx_shape = rg.make(folded_adv_idx_shape_vector, 0); - gather = rg.make(gather, folded_adv_idx_shape, false); - std::vector adv_idx_permute; - for (size_t i = 1; i < advanced_ids[0] + 1; i++) { - adv_idx_permute.push_back(i); - } - adv_idx_permute.push_back(0); - for (size_t i = advanced_ids[0] + 1; i < (rank - adv_idx_count + 1); i++) { - adv_idx_permute.push_back(i); - } - // Transpose folded advanced indexed axis to its original location. - auto permute_indicies = v0::Constant::create(element::i32, Shape{adv_idx_permute.size()}, adv_idx_permute); - gather = rg.make(gather, permute_indicies); - // unfold advanced index axes - for (size_t i = 0; i < advanced_ids[0]; i++) { - concat_dims.push_back(input_dims->output(i)); - } - concat_dims.push_back(cum_adv_index_shape_tensor); - for (auto i : non_used_dims) { - if (i < advanced_ids[0]) { - continue; - } - concat_dims.push_back(input_dims->output(i)); - } - - } else { - size_t i = 0; - auto one = v0::Constant::create(element::i32, Shape{1}, {1}); - while (i < non_used_dims.size() && non_used_dims[i] < advanced_ids[0]) { - concat_dims.push_back(one); - i++; - } - concat_dims.push_back(cum_adv_index_shape_tensor); - for (; i < non_used_dims.size(); i++) { - concat_dims.push_back(input_dims->output(non_used_dims[i])); - } - } - auto final_shape = rg.make(concat_dims, 0); - gather = rg.make(gather, final_shape, false); - return {gather}; -} -} // namespace - OutputVector translate_index(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); @@ -225,9 +40,12 @@ OutputVector translate_index(const NodeContext& context) { auto rank = x.get_partial_shape().rank(); // index transformation supports only tensors with static rank PYTORCH_OP_CONVERSION_CHECK(rank.is_static(), "Dynamic rank for aten::index input is not supported."); - auto res = index_on_list(rg, x, list_elems, rank.get_length()); + OutputVector ids{list_elems.begin(), list_elems.end()}; + ov::Output res; + bool use_input_as_output = true; + index_tensor_on_list(rg, x, ids, rank.get_length(), res, use_input_as_output); context.mark_nodes(rg.get()); - return res; + return {res}; } auto index_ov_type = indices.get_element_type(); if (index_ov_type.is_dynamic()) { @@ -267,9 +85,13 @@ OutputVector translate_index_fx(const NodeContext& context) { } // index transformation supports only tensors with static rank PYTORCH_OP_CONVERSION_CHECK(rank.is_static(), "Dynamic rank for aten::index input is not supported."); - auto res = index_on_list(rg, x, list_elems, rank.get_length()); + + OutputVector ids{list_elems.begin(), list_elems.end()}; + ov::Output res; + bool use_input_as_output = true; + index_tensor_on_list(rg, x, ids, rank, res, use_input_as_output); context.mark_nodes(rg.get()); - return res; + return {res}; }; } // namespace op diff --git a/src/frontends/pytorch/src/op/lerp.cpp b/src/frontends/pytorch/src/op/lerp.cpp new file mode 100644 index 00000000000000..67922da3e4578d --- /dev/null +++ b/src/frontends/pytorch/src/op/lerp.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_lerp(const NodeContext& context) { + // Tensor = aten::lerp(%lhs.1, %rhs.1, %self.weight) + num_inputs_check(context, 3, 3); + Output start; + Output end; + std::tie(start, end) = get_inputs_with_promoted_types(context, 0, 1); + + Output weight = context.get_input(2); + auto scale = context.mark_node(std::make_shared(end, start)); + weight = context.mark_node(std::make_shared(weight, scale)); + auto delta = context.mark_node(std::make_shared(scale, weight)); + return {context.mark_node(std::make_shared(start, delta))}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/max_poolnd.cpp b/src/frontends/pytorch/src/op/max_poolnd.cpp index b6a01af1a7c2df..b846de68d28b49 100644 --- a/src/frontends/pytorch/src/op/max_poolnd.cpp +++ b/src/frontends/pytorch/src/op/max_poolnd.cpp @@ -12,9 +12,13 @@ #include "openvino/op/multiply.hpp" #include "openvino/op/pad.hpp" #include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/squeeze.hpp" #include "openvino/op/subtract.hpp" +#include "openvino/op/unsqueeze.hpp" #include "openvino/op/util/framework_node.hpp" #include "utils.hpp" @@ -24,9 +28,31 @@ namespace pytorch { namespace op { using namespace ov::op; - -OutputVector translate_max_poolnd(const NodeContext& context) { +OutputVector translate_max_pool_base(const NodeContext& context, int dims) { num_inputs_check(context, 3, 6); + auto input = context.get_input(0); + auto input_shape = context.mark_node(std::make_shared(input)); + + auto const_0 = v0::Constant::create(element::i64, Shape{1}, {0}); + auto const_1 = v0::Constant::create(element::i64, Shape{1}, {1}); + bool is_static = input.get_partial_shape().rank().is_static(); + bool no_batch_dim = is_static && input.get_partial_shape().rank().get_length() == dims + 1; + + if (is_static) { + if (no_batch_dim) { + input = context.mark_node(std::make_shared(input, const_0)); + } + } else { + input = context.mark_node(std::make_shared(input, const_0)); + auto unsqueeze_shape = context.mark_node(std::make_shared(input)); + auto rank = context.mark_node(std::make_shared(unsqueeze_shape)); + auto end_index = context.mark_node(std::make_shared(rank, const_1)); + auto start_index = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims - 2})); + auto reshape_pattern = + context.mark_node(std::make_shared(unsqueeze_shape, start_index, end_index, const_1, const_0)); + input = context.mark_node(std::make_shared(input, reshape_pattern, true)); + } + auto kernel = context.const_input(1); Strides strides; if (!context.input_is_none(2)) { @@ -53,7 +79,7 @@ OutputVector translate_max_poolnd(const NodeContext& context) { rounding_type = context.const_input(5) ? RoundingType::CEIL_TORCH : RoundingType::FLOOR; } - auto res = context.mark_node(std::make_shared(context.get_input(0), + auto res = context.mark_node(std::make_shared(input, strides, dilations, pads, @@ -63,19 +89,76 @@ OutputVector translate_max_poolnd(const NodeContext& context) { PadType::EXPLICIT, element::i64, 2)); - if (context.get_output_size() == 2) { - auto out1 = res->output(0); - auto out2 = res->output(1); - return {std::move(out1), std::move(out2)}; + if (is_static) { + if (no_batch_dim) { + if (context.get_output_size() == 2) { + auto out1 = res->output(0); + auto out2 = res->output(1); + out1 = context.mark_node(std::make_shared(out1, const_0)); + out2 = context.mark_node(std::make_shared(out2, const_0)); + return {std::move(out1), std::move(out2)}; + } else { + res = context.mark_node(std::make_shared(res, const_0)); + return {res}; + } + } else { + if (context.get_output_size() == 2) { + auto out1 = res->output(0); + auto out2 = res->output(1); + return {std::move(out1), std::move(out2)}; + } else { + return {res}; + } + } + } else { - return {res}; + auto pooled_output_shape = context.mark_node(std::make_shared(res)); + + auto start_index_input = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto slice_input_shape = + context.mark_node(std::make_shared(input_shape, const_0, start_index_input, const_1, const_0)); + + auto start_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto end_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {2 + dims})); + auto slice_pooled_output_shape = context.mark_node( + std::make_shared(pooled_output_shape, start_index_pooled, end_index_pooled, const_1, const_0)); + + auto concat_shape = context.mark_node( + std::make_shared(OutputVector{slice_input_shape, slice_pooled_output_shape}, 0)); + if (context.get_output_size() == 2) { + auto out1 = res->output(0); + auto out2 = res->output(1); + out1 = context.mark_node(std::make_shared(out1, concat_shape, true)); + out2 = context.mark_node(std::make_shared(out2, concat_shape, true)); + return {std::move(out1), std::move(out2)}; + } else { + res = context.mark_node(std::make_shared(res, concat_shape, true)); + return {res}; + } } }; -OutputVector translate_max_poolnd_fx(const NodeContext& context) { - auto output = translate_max_poolnd(context); +OutputVector translate_max_pool1d(const NodeContext& context) { + return translate_max_pool_base(context, 1); +}; + +OutputVector translate_max_pool2d(const NodeContext& context) { + return translate_max_pool_base(context, 2); +}; + +OutputVector translate_max_pool3d(const NodeContext& context) { + return translate_max_pool_base(context, 3); +}; + +OutputVector translate_max_pool2d_fx(const NodeContext& context) { + auto output = translate_max_pool2d(context); return {context.mark_node(make_list_construct(output))}; -} +}; + +OutputVector translate_max_pool3d_fx(const NodeContext& context) { + auto output = translate_max_pool3d(context); + return {context.mark_node(make_list_construct(output))}; +}; } // namespace op } // namespace pytorch diff --git a/src/frontends/pytorch/src/op/search_sorted.cpp b/src/frontends/pytorch/src/op/search_sorted.cpp new file mode 100644 index 00000000000000..ca9f6b49ff7bf9 --- /dev/null +++ b/src/frontends/pytorch/src/op/search_sorted.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/search_sorted.hpp" + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_search_sorted(const NodeContext& context) { + num_inputs_check(context, 2, 5); + Output sorted; + Output values; + std::tie(sorted, values) = get_inputs_with_promoted_types(context, 0, 1); + const bool out_int32 = context.const_input(2); + PYTORCH_OP_CONVERSION_CHECK(out_int32 == false, "aten::searchsorted(out_int32=true) unsupported"); + const bool right_mode = context.const_input(3); + PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(4), "aten::searchsorted(side) unsupported"); + PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(5), "aten::searchsorted(out) unsupported"); + PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(6), "aten::searchsorted(sorter) unsupported"); + auto op = context.mark_node(std::make_shared(sorted, values, right_mode)); + return {op}; +}; +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/pytorch/src/op/stft.cpp b/src/frontends/pytorch/src/op/stft.cpp new file mode 100644 index 00000000000000..b7e4858c2f8fcc --- /dev/null +++ b/src/frontends/pytorch/src/op/stft.cpp @@ -0,0 +1,93 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/stft.hpp" + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_stft(const NodeContext& context) { + // schema: aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool + // normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor + // + // Note: aten::stft doesn't have "center" and "pad_mode" attrs like torch.stft, so the number of the inputs is lower + // and index of any input after the "window" is smaller accordingly + + num_inputs_check(context, 2, 8); + + auto input = context.get_input(0); + auto n_fft = context.get_input(1); + + ov::Output hop_length; + if (!context.input_is_none(2)) { + hop_length = context.get_input(2); + } else { + // Defualt floor(n_fft / 4) + const auto four = context.mark_node(std::make_shared(ov::element::i32, Shape{}, 4)); + const auto four_cast = context.mark_node(std::make_shared(four, n_fft)); + hop_length = context.mark_node(std::make_shared(n_fft, four_cast)); + } + + ov::Output win_length; + if (!context.input_is_none(3)) { + win_length = context.get_input(3); + } else { + win_length = n_fft; + } + + ov::Output window; + if (!context.input_is_none(4)) { + window = context.get_input(4); + } else { + const auto one = context.mark_node(std::make_shared(ov::element::i32, Shape{}, 1)); + const auto one_cast = context.mark_node(std::make_shared(one, input)); + const auto zero = context.mark_node(std::make_shared(ov::element::i32, Shape{1}, 0)); + const auto win_length_cast = + context.mark_node(std::make_shared(win_length, ov::element::i64)); + const auto win_len_vec = context.mark_node(std::make_shared(win_length_cast, zero)); + window = context.mark_node(std::make_shared(one_cast, win_len_vec)); + } + + bool normalized = false; + if (!context.input_is_none(5)) { + normalized = context.const_input(5); + } + PYTORCH_OP_CONVERSION_CHECK(!normalized, + "aten::stft conversion is currently supported with normalized=False only."); + + bool onesided = true; + if (!context.input_is_none(6)) { + onesided = context.const_input(6); + } + PYTORCH_OP_CONVERSION_CHECK(onesided, "aten::stft conversion is currently supported with onesided=True only."); + + bool return_complex = false; + if (!context.input_is_none(7)) { + return_complex = context.const_input(7); + } + PYTORCH_OP_CONVERSION_CHECK(!return_complex, + "aten::stft conversion is currently supported with return_complex=False only."); + + // Perform STFT + constexpr bool transpose_frames = true; + auto stft = context.mark_node(std::make_shared(input, window, n_fft, hop_length, transpose_frames)); + return {stft}; +}; +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index b68c182e17ee2a..607f0bd32db80d 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -42,7 +42,10 @@ OP_CONVERTER(translate_argmax); OP_CONVERTER(translate_argmin); OP_CONVERTER(translate_as_strided); OP_CONVERTER(translate_as_tensor); -OP_CONVERTER(translate_avg_poolnd); +OP_CONVERTER(translate_atan2); +OP_CONVERTER(translate_avg_pool1d); +OP_CONVERTER(translate_avg_pool2d); +OP_CONVERTER(translate_avg_pool3d); OP_CONVERTER(translate_bool); OP_CONVERTER(translate_batch_norm); OP_CONVERTER(translate_bitwise_and); @@ -118,6 +121,7 @@ OP_CONVERTER(translate_inverse); OP_CONVERTER(translate_is_nonzero); OP_CONVERTER(translate_layer_norm); OP_CONVERTER(translate_len); +OP_CONVERTER(translate_lerp); OP_CONVERTER(translate_linalg_cross); OP_CONVERTER(translate_linalg_norm); OP_CONVERTER(translate_linalg_matrix_norm); @@ -139,7 +143,9 @@ OP_CONVERTER(translate_masked_scatter); OP_CONVERTER(translate_masked_select); OP_CONVERTER(translate_max); OP_CONVERTER(translate_maximum); -OP_CONVERTER(translate_max_poolnd); +OP_CONVERTER(translate_max_pool1d); +OP_CONVERTER(translate_max_pool2d); +OP_CONVERTER(translate_max_pool3d); OP_CONVERTER(translate_mean); OP_CONVERTER(translate_meshgrid); OP_CONVERTER(translate_min); @@ -215,6 +221,7 @@ OP_CONVERTER(translate_square); OP_CONVERTER(translate_squeeze); OP_CONVERTER(translate_std); OP_CONVERTER(translate_std_mean); +OP_CONVERTER(translate_stft); OP_CONVERTER(translate_sub); OP_CONVERTER(translate_sub_); OP_CONVERTER(translate_sum); @@ -281,7 +288,8 @@ OP_CONVERTER(translate_leaky_relu_fx); OP_CONVERTER(translate_log_sigmoid_fx); OP_CONVERTER(translate_log_softmax_fx); OP_CONVERTER(translate_max_dim_fx); -OP_CONVERTER(translate_max_poolnd_fx); +OP_CONVERTER(translate_max_pool2d_fx); +OP_CONVERTER(translate_max_pool3d_fx); OP_CONVERTER(translate_mean_fx); OP_CONVERTER(translate_min_dim_fx); OP_CONVERTER(translate_new_full_fx); @@ -294,6 +302,7 @@ OP_CONVERTER(translate_reshape_fx); OP_CONVERTER(translate_rsub_fx); OP_CONVERTER(translate_scalar_tensor_fx); OP_CONVERTER(translate_scaled_dot_product_attention_fx); +OP_CONVERTER(translate_search_sorted); OP_CONVERTER(translate_select_scatter_fx); OP_CONVERTER(translate_slice_fx); OP_CONVERTER(translate_slice_scatter_fx); @@ -322,12 +331,15 @@ OP_CONVERTER(translate_zeros_like_fx); const std::unordered_map get_supported_ops_ts() { return { {"aten::__and__", op::translate_bitwise_and}, + {"aten::__iand__", op::inplace_op}, {"aten::__derive_index", op::translate_derive_index}, {"aten::__getitem__", op::translate_getitem}, {"aten::__not__", op::translate_1to1_match_1_inputs}, {"aten::__or__", op::translate_bitwise_or}, + {"aten::__ior__", op::inplace_op}, {"aten::__range_length", op::translate_range_length}, {"aten::__xor__", op::translate_bitwise_xor}, + {"aten::__ixor__", op::inplace_op}, {"aten::_convolution", op::translate_convolution}, {"aten::_convolution_mode", op::translate_convolution_mode}, {"aten::_native_multi_head_attention", op::translate_native_multi_head_attention}, @@ -380,9 +392,10 @@ const std::unordered_map get_supported_ops_ts() { {"aten::atanh", op::optional_out, 1>}, {"aten::atanh_", op::inplace_op>}, - {"aten::avg_pool1d", op::quantizable_op}, - {"aten::avg_pool2d", op::quantizable_op}, - {"aten::avg_pool3d", op::quantizable_op}, + {"aten::atan2", op::translate_atan2}, + {"aten::avg_pool1d", op::quantizable_op}, + {"aten::avg_pool2d", op::quantizable_op}, + {"aten::avg_pool3d", op::quantizable_op}, {"aten::baddbmm", op::translate_addmm}, {"aten::batch_norm", op::translate_batch_norm}, {"aten::bitwise_and", op::translate_bitwise_and}, @@ -500,6 +513,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::le", op::translate_1to1_match_2_inputs_align_types}, {"aten::leaky_relu", op::translate_1to1_match_2_inputs}, {"aten::len", op::translate_len}, + {"aten::lerp", op::translate_lerp}, // lift op is torchscript specific op responsible for tensors coping with guarantee of new memory allocation {"aten::lift", op::skip_node}, {"aten::lift_fresh", op::skip_node}, @@ -534,12 +548,12 @@ const std::unordered_map get_supported_ops_ts() { {"aten::max", op::translate_max}, {"aten::mv", op::translate_1to1_match_2_inputs}, {"aten::maximum", op::translate_maximum}, - {"aten::max_pool1d", op::quantizable_op}, - {"aten::max_pool1d_with_indices", op::quantizable_op}, - {"aten::max_pool2d", op::quantizable_op}, - {"aten::max_pool2d_with_indices", op::quantizable_op}, - {"aten::max_pool3d", op::quantizable_op}, - {"aten::max_pool3d_with_indices", op::quantizable_op}, + {"aten::max_pool1d", op::quantizable_op}, + {"aten::max_pool1d_with_indices", op::quantizable_op}, + {"aten::max_pool2d", op::quantizable_op}, + {"aten::max_pool2d_with_indices", op::quantizable_op}, + {"aten::max_pool3d", op::quantizable_op}, + {"aten::max_pool3d_with_indices", op::quantizable_op}, {"aten::mean", op::quantizable_op}, {"aten::meshgrid", op::translate_meshgrid}, {"aten::min", op::translate_min}, @@ -610,6 +624,7 @@ const std::unordered_map get_supported_ops_ts() { {"aten::rsqrt", op::optional_out}, {"aten::rsqrt_", op::inplace_op}, {"aten::rsub", op::translate_rsub}, + {"aten::searchsorted", op::translate_search_sorted}, {"aten::ScalarImplicit", op::skip_node}, {"aten::scaled_dot_product_attention", op::translate_scaled_dot_product_attention}, {"aten::scatter", op::translate_scatter}, @@ -640,6 +655,7 @@ const std::unordered_map get_supported_ops_ts() { // aten::stack - Supported in limited set of patterns {"aten::std", op::translate_std}, {"aten::std_mean", op::translate_std_mean}, + {"aten::stft", op::translate_stft}, {"aten::sub", op::translate_sub}, {"aten::sub_", op::translate_sub_}, {"aten::sum", op::translate_sum}, @@ -737,6 +753,7 @@ const std::unordered_map get_supported_ops_fx() { {"aten._native_batch_norm_legit.no_stats", op::translate_batch_norm_legit_no_stats_fx}, {"aten._native_batch_norm_legit_functional.default", op::translate_batch_norm_legit_fx}, {"aten._native_batch_norm_legit_no_training.default", op::translate_batch_norm_legit_no_training_fx}, + {"aten._safe_softmax.default", op::translate_softmax_fx}, {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx}, {"aten._scaled_dot_product_flash_attention_for_cpu.default", op::translate_scaled_dot_product_attention_fx}, {"aten._softmax.default", op::translate_softmax_fx}, @@ -771,8 +788,9 @@ const std::unordered_map get_supported_ops_fx() { {"aten.asinh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.atan.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.atanh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, - {"aten.avg_pool2d.default", op::translate_avg_poolnd}, - {"aten.avg_pool3d.default", op::translate_avg_poolnd}, + {"aten.atan2.default", op::translate_atan2}, + {"aten.avg_pool2d.default", op::translate_avg_pool2d}, + {"aten.avg_pool3d.default", op::translate_avg_pool3d}, {"aten.baddbmm.default", op::translate_addmm_fx}, {"aten.bitwise_and.Scalar", op::translate_bitwise_and}, {"aten.bitwise_and.Tensor", op::translate_bitwise_and}, @@ -870,8 +888,8 @@ const std::unordered_map get_supported_ops_fx() { {"aten.masked_fill_.Tensor", op::inplace_op}, {"aten.max.default", op::translate_max}, {"aten.max.dim", op::translate_max_dim_fx}, - {"aten.max_pool2d_with_indices.default", op::translate_max_poolnd_fx}, - {"aten.max_pool3d_with_indices.default", op::translate_max_poolnd_fx}, + {"aten.max_pool2d_with_indices.default", op::translate_max_pool2d_fx}, + {"aten.max_pool3d_with_indices.default", op::translate_max_pool3d_fx}, {"aten.maximum.default", op::translate_maximum}, {"aten.mean.default", op::translate_mean_fx}, {"aten.mean.dim", op::translate_mean_fx}, diff --git a/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp index 39a9bc710ca08d..9294409a565691 100644 --- a/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp @@ -34,34 +34,6 @@ namespace pass { using namespace ov::op; -namespace { -Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis) { - // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of - // input tensor. The last dimension is the product of the rest of input tensor dimensions: - // [d_{axis}, ..., d_n] - Output output_shape; - if (axis == 0) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {1, -1}); - } else if (axis == 1) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {0, -1}); - } else { - const auto value_shape = rg.make(value, element::i32); - const auto value_rank = rg.make(value_shape, element::i32); - const auto axis_node = v0::Constant::create(element::i32, Shape{1}, {axis}); - auto start = v0::Constant::create(element::i32, Shape{1}, {0}); - auto step = v0::Constant::create(element::i32, Shape{1}, {1}); - const auto first_part_dims = rg.make(value_shape, start, axis_node, step); - auto zero = v0::Constant::create(element::i32, {}, {0}); - auto first_part_dims_length = rg.make(first_part_dims, zero, true); - - auto remaining_part_length = v0::Constant::create(element::i32, {1}, {-1}); - - output_shape = rg.make(OutputVector{first_part_dims_length, remaining_part_length}, 0); - } - return rg.make(value, output_shape, true); -} -}; // namespace - AtenIndexToSelect::AtenIndexToSelect() { auto index_op = ov::pass::pattern::wrap_type(); @@ -75,162 +47,22 @@ AtenIndexToSelect::AtenIndexToSelect() { auto indicies = index_op->input_value(1).get_node_shared_ptr(); auto list_indicies = cast_fw_node(indicies, "prim::ListConstruct"); if (list_indicies) { - // Multiple tensors as indices. Each tensor could either be - // 1. prim::Constant() - // representing ":" in python indexing. E.g. tensor[:, :] - // 2. prim::Constant[value=...] or tensor output - // representing advanced indexing. E.g. tensor[[0, 1], [2, 0]]. - // For more info on advanced indexing, - // check https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing - - // Consider a general case of - // t: [x_1, y_1, y_2, ..., x_m, ..., y_n] - // where t is a tensor of rank m+n, {x_i} are axes where tensor index is provided, and {y_i} are axes for - // ":". Same results can be achieved through transposing t into - // t: [x_1, x_2, ..., x_m, y_1, y_2, ..., y_n] - // and use gather - // t: [x_1 * x_2 * ... * x_m, y_1 * y_2 * ... * y_n] - // tensor index = \sum_{i=1}^m (ind_i * \prod_{j=i+1}^m (x_j)) - // After gather, reshape and transpose back. auto ids = list_indicies->input_values(); - std::vector advanced_ids; - std::vector is_masked_bool; - OutputVector masked_indicies; - // for case when index is bool e.g. x[x>0], replace index with non_zero - for (size_t i = 0; i < ids.size(); i++) { - auto const_input = cast_fw_node(ids[i].get_node_shared_ptr(), "prim::Constant"); - - // skip dimensions where index is None - if (const_input) { - const auto& attrs = const_input->get_attrs(); - if (attrs.find("none_value") != attrs.end()) { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - continue; - } - } - auto id_dtype = ids[i].get_element_type(); - if (id_dtype == element::boolean || id_dtype == element::u8) { - auto idx = rg.make(ids[i], element::u8); - auto nonzero = rg.make(idx, element::i32); - auto input_order = v0::Constant::create(element::i32, Shape{2}, {1, 0}); - auto masked_id = rg.make(nonzero, input_order); - masked_indicies.push_back(masked_id); - is_masked_bool.push_back(true); - } else { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - } - advanced_ids.push_back(i); - } - - // all indicies prim::Constant(None), return input as is - if (advanced_ids.size() == 0) { - index_op->output(0).replace(index_op->get_input_source_output(0)); - return true; - } - // perform gather for single element case - if (advanced_ids.size() == 1) { - auto index = masked_indicies[advanced_ids[0]]; - if (is_masked_bool[advanced_ids[0]]) { - auto gather = rg.make(input_node, index); - copy_runtime_info_and_name(index_op, rg.get()); - replace_node(index_op, gather); - return true; - } - index = rg.make(index, element::i32); - auto dim = v0::Constant::create(element::i32, Shape{}, {advanced_ids[0]}); - auto gather = rg.make(input_node, index, dim); - copy_runtime_info_and_name(index_op, rg.get()); - replace_node(index_op, gather); - return true; - } - auto adv_idx_count = advanced_ids.size(); auto rank = input_node.get_partial_shape().rank(); // index transformation supports only tensors with static rank - if (rank.is_dynamic()) { + ov::Output new_output; + bool use_input_as_output = true; + if (!index_tensor_on_list(rg, input_node, ids, rank, new_output, use_input_as_output)) { add_exception_to_fw_node(index_op, "aten::index: dynamic rank for aten::index input is not supported."); return false; } - auto input_shape = rg.make(input_node, element::i32); - auto zero = v0::Constant::create(element::i32, Shape{}, {0}); - auto input_dims = rg.make(input_shape, zero, rank.get_length()); - std::vector non_used_dims; - for (auto i = 0; i < rank.get_length(); i++) { - if (std::find(advanced_ids.begin(), advanced_ids.end(), i) == advanced_ids.end()) { - non_used_dims.push_back(i); - } - } - std::vector permutation_dims; - permutation_dims.insert(permutation_dims.end(), advanced_ids.begin(), advanced_ids.end()); - permutation_dims.insert(permutation_dims.end(), non_used_dims.begin(), non_used_dims.end()); - auto transpose_dims = v0::Constant::create(element::i32, Shape{permutation_dims.size()}, permutation_dims); - auto transposed_input = rg.make(input_node, transpose_dims); - auto flatten_input = flatten(rg, transposed_input, adv_idx_count); - auto cum_adv_index = masked_indicies[advanced_ids[adv_idx_count - 1]]; - cum_adv_index = rg.make(cum_adv_index, element::i32); - auto multiplier = input_dims->output(advanced_ids[adv_idx_count - 1]); - for (int i = static_cast(adv_idx_count) - 2; i > -1; i--) { - auto input_id = advanced_ids[i]; - auto m_idx = rg.make(masked_indicies[input_id], element::i32); - auto adv_index = rg.make(m_idx, multiplier); - cum_adv_index = rg.make(cum_adv_index, adv_index); - multiplier = rg.make(multiplier, input_dims->output(input_id)); - } - std::shared_ptr gather = rg.make(flatten_input, cum_adv_index, zero); - OutputVector concat_dims; - // check if all advanced indices are consecutive. - std::vector consequence_dims; - auto cum_adv_index_shape_tensor = rg.make(cum_adv_index, element::i32); - for (size_t i = advanced_ids[0]; i <= advanced_ids[advanced_ids.size() - 1]; i++) { - consequence_dims.push_back(i); - } - // unfold regular index axes - if (advanced_ids == consequence_dims) { - OutputVector folded_adv_idx_shape_vector; - auto minus_one = v0::Constant::create(element::i32, Shape{1}, {-1}); - folded_adv_idx_shape_vector.push_back(minus_one); - for (auto i : non_used_dims) { - folded_adv_idx_shape_vector.push_back(input_dims->output(i)); - } - auto folded_adv_idx_shape = rg.make(folded_adv_idx_shape_vector, 0); - gather = rg.make(gather, folded_adv_idx_shape, false); - std::vector adv_idx_permute; - for (size_t i = 1; i < advanced_ids[0] + 1; i++) { - adv_idx_permute.push_back(i); - } - adv_idx_permute.push_back(0); - for (size_t i = advanced_ids[0] + 1; i < (rank.get_length() - adv_idx_count + 1); i++) { - adv_idx_permute.push_back(i); - } - // Transpose folded advanced indexed axis to its original location. - auto permute_indicies = - v0::Constant::create(element::i32, Shape{adv_idx_permute.size()}, adv_idx_permute); - gather = rg.make(gather, permute_indicies); - // unfold advanced index axes - for (size_t i = 0; i < advanced_ids[0]; i++) { - concat_dims.push_back(input_dims->output(i)); - } - concat_dims.push_back(cum_adv_index_shape_tensor); - for (auto i : non_used_dims) { - if (i < advanced_ids[0]) { - continue; - } - concat_dims.push_back(input_dims->output(i)); - } - - } else { - concat_dims.push_back(cum_adv_index_shape_tensor); - for (auto i : non_used_dims) { - concat_dims.push_back(input_dims->output(i)); - } + if (use_input_as_output) { + index_op->output(0).replace(index_op->get_input_source_output(0)); + return true; } - auto final_shape = rg.make(concat_dims, 0); - gather = rg.make(gather, final_shape, false); copy_runtime_info_and_name(index_op, rg.get()); - replace_node(index_op, gather); + replace_node(index_op, new_output.get_node_shared_ptr()); return true; - } else { auto const_input = cast_fw_node(indicies, "prim::Constant"); diff --git a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp index a533739b16fea1..caeeb8c557b380 100644 --- a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp @@ -40,18 +40,6 @@ uint32_t read_u4_data(const void* array, size_t index) { return val; }; -void write_u4_data(void* array, size_t index, uint32_t data) { - auto arr_u32 = reinterpret_cast(array); - size_t idx_u32 = index / 8; - size_t offset_u32 = index % 8; - uint32_t old_val = arr_u32[idx_u32]; - data = data << (offset_u32 * 4); - uint32_t mask = 15; - mask = ~(mask << (offset_u32 * 4)); - uint32_t new_val = (old_val & mask) | data; - arr_u32[idx_u32] = new_val; -}; - GPTQDecompressionReplacer::GPTQDecompressionReplacer() { const auto& const_1 = wrap_type(); const auto& const_2 = wrap_type(); @@ -73,61 +61,157 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() { const auto& convert_2 = wrap_type({const_6}); const auto& bitwise_and = wrap_type({add_or_convert, convert_2}); - ov::matcher_pass_callback callback = [unsqueeze_1](Matcher& m) { + ov::matcher_pass_callback callback = [=](Matcher& m) { auto bitwise_and = m.get_match_root(); if (!bitwise_and) { return false; } const auto& pattern_map = m.get_pattern_value_map(); - const auto& input_node = pattern_map.at(unsqueeze_1).get_node_shared_ptr(); - auto weights_u32 = std::dynamic_pointer_cast(input_node->get_input_node_shared_ptr(0)); - auto axis = std::dynamic_pointer_cast(input_node->get_input_node_shared_ptr(1)); - auto axis_data = axis->get_data_ptr(); - - auto u8_shape = weights_u32->get_shape(); - auto src = weights_u32->get_data_ptr(); - - ov::Shape u4_shape; - bool dim_added = false; - size_t stride = 1; - size_t size_y = 1; - for (size_t i = 0; i < u8_shape.size(); i++) { - if (axis_data[0] == i) { - u4_shape.push_back(8); - dim_added = true; - } - if (axis_data[0] <= i) { - stride *= u8_shape[i]; - } else { - size_y *= u8_shape[i]; - } - u4_shape.push_back(u8_shape[i]); + auto unsqueeze_1_node = pattern_map.at(unsqueeze_1).get_node_shared_ptr(); + auto unsqueeze_1_in0_const = + std::dynamic_pointer_cast(unsqueeze_1_node->get_input_node_shared_ptr(0)); + auto unsqueeze_1_in1_const = + std::dynamic_pointer_cast(unsqueeze_1_node->get_input_node_shared_ptr(1)); + auto abs_node = pattern_map.at(abs).get_node_shared_ptr(); + auto abs_in_const = std::dynamic_pointer_cast(abs_node->get_input_node_shared_ptr(0)); + auto broadcast_node = pattern_map.at(broadcast).get_node_shared_ptr(); + auto unsqueeze_2_node = pattern_map.at(unsqueeze_2).get_node_shared_ptr(); + auto unsqueeze_2_in0_const = + std::dynamic_pointer_cast(unsqueeze_2_node->get_input_node_shared_ptr(0)); + auto unsqueeze_2_in1_const = + std::dynamic_pointer_cast(unsqueeze_2_node->get_input_node_shared_ptr(1)); + + OutputVector outputs_1(unsqueeze_1_node->get_output_size()); + OutputVector unsqueeze_1_inputs(2); + unsqueeze_1_inputs[0] = unsqueeze_1_in0_const->outputs()[0]; + unsqueeze_1_inputs[1] = unsqueeze_1_in1_const->outputs()[0]; + if (!unsqueeze_1_node->constant_fold(outputs_1, unsqueeze_1_inputs)) { + return false; } - if (!dim_added) { - u4_shape.push_back(8); + + OutputVector outputs_2(abs_node->get_output_size()); + if (!abs_node->constant_fold(outputs_2, abs_in_const->outputs())) { + return false; } - auto new_const = std::make_shared(element::u4, u4_shape); - auto dst = const_cast(reinterpret_cast(new_const->get_data_ptr())); + OutputVector outputs_3(broadcast_node->get_output_size()); + OutputVector broadcast_inputs(2); + broadcast_inputs[0] = outputs_1[0]; + broadcast_inputs[1] = outputs_2[0]; + if (!broadcast_node->constant_fold(outputs_3, broadcast_inputs)) { + return false; + } + + OutputVector outputs_4(unsqueeze_2_node->get_output_size()); + OutputVector unsqueeze_2_inputs(2); + unsqueeze_2_inputs[0] = unsqueeze_2_in0_const->outputs()[0]; + unsqueeze_2_inputs[1] = unsqueeze_2_in1_const->outputs()[0]; + if (!unsqueeze_2_node->constant_fold(outputs_4, unsqueeze_2_inputs)) { + return false; + } + const int32_t* rs_in0 = + std::dynamic_pointer_cast(outputs_3[0].get_node_shared_ptr())->get_data_ptr(); + const int32_t* rs_in1 = + std::dynamic_pointer_cast(outputs_4[0].get_node_shared_ptr())->get_data_ptr(); + auto shifted_const = std::make_shared(element::i32, outputs_3[0].get_shape()); + auto dst = const_cast(reinterpret_cast(shifted_const->get_data_ptr())); if (!dst) return false; - size_t in_idx = 0; - for (size_t y = 0; y < size_y; y++) { - size_t offset = y * stride * 8; - for (size_t x = 0; x < stride; x++) { - for (size_t z = 0; z < 8; z++) { - uint32_t val = read_u4_data(src, in_idx); - write_u4_data(dst, (offset + x + stride * z), val); - in_idx++; - } + // TODO: Bitwise right shift operation below might need to be + // optimized to reduce FIL. + size_t rs_in0_shape_size = shape_size(outputs_3[0].get_shape()); + const auto& rs_in0_shape = outputs_3[0].get_shape(); + const auto& rs_in1_shape = outputs_4[0].get_shape(); + int shift_dim = -1; + size_t shift_offset = 1; + for (size_t i = 0; i < rs_in1_shape.size(); ++i) { + size_t dim = rs_in1_shape[i]; + if (dim != 1 && dim != rs_in0_shape[i]) { + return false; + } + if (shift_dim != -1) { + shift_offset *= rs_in0_shape[i]; + } + if (dim == rs_in0_shape[i]) { + shift_dim = static_cast(i); + } + } + if (shift_dim == -1) + return false; + for (size_t k = 0; k < rs_in0_shape_size; ++k) { + size_t shift_idx = (k / shift_offset) % rs_in1_shape[shift_dim]; + int32_t shift_val = rs_in1[shift_idx]; + dst[k] = (rs_in0[k] >> shift_val); + } + + std::shared_ptr convert_1_node = nullptr; + OutputVector outputs_7; + if (pattern_map.find(convert_1) != pattern_map.end()) { + convert_1_node = pattern_map.at(convert_1).get_node_shared_ptr(); + outputs_7.resize(convert_1_node->get_output_size()); + if (!convert_1_node->constant_fold(outputs_7, shifted_const->outputs())) { + return false; + } + } else { + auto convert_3_node = pattern_map.at(convert_3).get_node_shared_ptr(); + auto convert_4_node = pattern_map.at(convert_4).get_node_shared_ptr(); + auto convert_4_in_const = + std::dynamic_pointer_cast(convert_4_node->get_input_node_shared_ptr(0)); + auto add_node = pattern_map.at(add).get_node_shared_ptr(); + OutputVector outputs_5(convert_3_node->get_output_size()); + if (!convert_3_node->constant_fold(outputs_5, shifted_const->outputs())) { + return false; + } + OutputVector outputs_6(convert_4_node->get_output_size()); + if (!convert_4_node->constant_fold(outputs_6, convert_4_in_const->outputs())) { + return false; + } + outputs_7.resize(add_node->get_output_size()); + OutputVector add_inputs(2); + add_inputs[0] = outputs_5[0]; + add_inputs[1] = outputs_6[0]; + if (!add_node->constant_fold(outputs_7, add_inputs)) { + return false; } } - copy_runtime_info_and_name(weights_u32, {new_const}, {weights_u32, bitwise_and}); + auto convert_2_node = pattern_map.at(convert_2).get_node_shared_ptr(); + auto convert_2_in_const = std::dynamic_pointer_cast(convert_2_node->get_input_node_shared_ptr(0)); + + OutputVector outputs_8(convert_2_node->get_output_size()); + if (!convert_2_node->constant_fold(outputs_8, convert_2_in_const->outputs())) { + return false; + } + + OutputVector outputs_9(bitwise_and->get_output_size()); + + const int8_t* and_in0 = + std::dynamic_pointer_cast(outputs_7[0].get_node_shared_ptr())->get_data_ptr(); + const int8_t* and_in1 = + std::dynamic_pointer_cast(outputs_8[0].get_node_shared_ptr())->get_data_ptr(); + auto masked_const = std::make_shared(element::i8, outputs_7[0].get_shape()); + auto masked_dst = const_cast(reinterpret_cast(masked_const->get_data_ptr())); + if (!masked_dst) + return false; + + size_t and_in0_shape_size = shape_size(outputs_7[0].get_shape()); + // TODO: Bitwise and operation below might need to be + // optimized to reduce FIL. + int8_t mask = and_in1[0]; + for (size_t k = 0; k < and_in0_shape_size; ++k) { + masked_dst[k] = (and_in0[k] & mask); + } + + auto convert_to_u4 = std::make_shared(masked_const, element::u4); + OutputVector outputs_10(convert_to_u4->get_output_size()); + if (!convert_to_u4->constant_fold(outputs_10, masked_const->outputs())) { + return false; + } - auto new_convert = std::make_shared(new_const, bitwise_and->get_output_element_type(0)); - copy_runtime_info_and_name(bitwise_and, {new_convert}, {input_node}); + auto new_convert = + std::make_shared(outputs_10[0].get_node_shared_ptr(), bitwise_and->get_output_element_type(0)); + copy_runtime_info_and_name(bitwise_and, {new_convert}, {unsqueeze_1_node}); replace_node(bitwise_and, new_convert); return true; }; diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp index 675a293269002b..797fa531c43b60 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -6,9 +6,12 @@ #include "openvino/core/rt_info.hpp" #include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "utils.hpp" #include "utils_quantize.hpp" @@ -53,6 +56,7 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr(); auto transpose = pattern_to_output[m_transpose].get_node_shared_ptr(); auto reshape2 = pattern_to_output[m_reshape2].get_node_shared_ptr(); + auto pattern_root = reshape2; if (constant->get_element_type() != element::u4) return false; @@ -76,9 +80,26 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { auto get_number = get_u4; auto constant_dtype = element::u4; + NodeVector copy_from{std::move(constant), std::move(reshape1), std::move(transpose), reshape2}; if (is_symmetrical) { get_number = get_i4; constant_dtype = element::i4; + // find pattern Convert(W, i8) -> Subtract(8) + auto reshape_targets = reshape2->output(0).get_target_inputs(); + if (reshape_targets.size() != 1) + return false; + auto convert = reshape_targets.begin()->get_node()->shared_from_this(); + if (!std::dynamic_pointer_cast(convert)) + return false; + auto convert_targets = convert->output(0).get_target_inputs(); + if (convert_targets.size() != 1) + return false; + auto subtract = convert_targets.begin()->get_node()->shared_from_this(); + if (!std::dynamic_pointer_cast(subtract)) + return false; + pattern_root = subtract; + copy_from.push_back(std::move(convert)); + copy_from.push_back(subtract); } auto new_const = std::make_shared(constant_dtype, destination_shape); auto dst = const_cast( // const_cast? @@ -96,8 +117,81 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { } } - copy_runtime_info({std::move(constant), std::move(reshape1), std::move(transpose), reshape2}, new_const); - replace_node(reshape2, new_const); + copy_runtime_info(copy_from, new_const); + replace_node(pattern_root, new_const); + + return true; + }); +}; + +U4ConvertReshape::U4ConvertReshape() { + const auto& m_constant = wrap_type(type_matches(element::u4)); + const auto& m_convert = wrap_type({m_constant}); + + const auto& m_constant_1 = wrap_type(); + const auto& m_convert_1 = wrap_type({m_constant_1}); + const auto& m_constant_8 = wrap_type(); + const auto& m_convert_8 = wrap_type({m_constant_8}); + const auto& m_multiply = wrap_type({m_convert_8, m_convert_1}); + + const auto& m_converted_constant_8 = + std::make_shared(ov::OutputVector{m_multiply, m_convert_8}); + const auto& m_subtract = wrap_type({m_convert, m_converted_constant_8}); + const auto& m_converted_constant = + std::make_shared(ov::OutputVector{m_subtract, m_constant}); + const auto& m_reshape = wrap_type({m_converted_constant, any_input()}); + + register_matcher( + std::make_shared(m_reshape, "ov::frontend::pytorch::pass::U4ConvertReshape"), + [=](Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto u4_const = + std::dynamic_pointer_cast(pattern_to_output[m_constant].get_node_shared_ptr()); + if (!u4_const) + return false; + + if (u4_const->get_element_type() != element::u4) + return false; + + auto reshape = pattern_to_output[m_reshape].get_node_shared_ptr(); + auto dst_shape = reshape->get_output_shape(0); + + std::shared_ptr new_const; + if (pattern_to_output.count(m_constant_8)) { + auto constant_8 = std::dynamic_pointer_cast( + pattern_to_output[m_constant_8].get_node_shared_ptr()); + if (ov::shape_size(constant_8->get_output_shape(0)) != 1 || + constant_8->get_output_element_type(0).is_real() || constant_8->cast_vector()[0] != 8) + return false; + + if (pattern_to_output.count(m_constant_1)) { + auto constant_1 = std::dynamic_pointer_cast( + pattern_to_output[m_constant_1].get_node_shared_ptr()); + if (ov::shape_size(constant_1->get_output_shape(0)) != 1 || + constant_1->get_output_element_type(0).is_real() || constant_1->cast_vector()[0] != 1) + return false; + } + + new_const = std::make_shared(element::i4, dst_shape); + auto dst = const_cast(reinterpret_cast(new_const->get_data_ptr())); + + auto src = u4_const->get_data_ptr(); + auto num_elements = ov::shape_size(u4_const->get_output_shape(0)); + + for (size_t i = 0; i < num_elements / 2; ++i) { + // subtracting 8 from 2 int4 elements + dst[i] = src[i] ^ 0b10001000; + } + } else { + new_const = std::make_shared(*u4_const, dst_shape); + } + + NodeVector pattern_nodes; + for (auto const& iout : pattern_to_output) + pattern_nodes.push_back(std::move(iout.first)); + + copy_runtime_info(pattern_nodes, new_const); + replace_node(reshape, new_const); return true; }); diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp index 6ab65a5d1c3838..99742ff148813a 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp @@ -18,6 +18,12 @@ class U4BlockRepack : public ov::pass::MatcherPass { U4BlockRepack(bool is_symmetrical = false); }; +class U4ConvertReshape : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::U4ConvertReshape"); + U4ConvertReshape(); +}; + } // namespace pass } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index 852de6e90fa25b..752b9accb71d01 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -17,6 +17,7 @@ #include "openvino/op/gather.hpp" #include "openvino/op/gather_nd.hpp" #include "openvino/op/mod.hpp" +#include "openvino/op/multiply.hpp" #include "openvino/op/non_zero.hpp" #include "openvino/op/range.hpp" #include "openvino/op/reduce_prod.hpp" @@ -24,6 +25,7 @@ #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" #include "openvino/op/slice.hpp" +#include "openvino/op/split.hpp" #include "openvino/op/squeeze.hpp" #include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" @@ -664,6 +666,197 @@ Output masked_select(const NodeContext& context, const Output& data, return context.mark_node(std::make_shared(data, masked_id)); } +Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis) { + // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of + // input tensor. The last dimension is the product of the rest of input tensor dimensions: + // [d_{axis}, ..., d_n] + Output output_shape; + if (axis == 0) { + output_shape = v0::Constant::create(element::i32, Shape{2}, {1, -1}); + } else if (axis == 1) { + output_shape = v0::Constant::create(element::i32, Shape{2}, {0, -1}); + } else { + const auto value_shape = rg.make(value, element::i32); + const auto value_rank = rg.make(value_shape, element::i32); + const auto axis_node = v0::Constant::create(element::i32, Shape{1}, {axis}); + auto start = v0::Constant::create(element::i32, Shape{1}, {0}); + auto step = v0::Constant::create(element::i32, Shape{1}, {1}); + const auto first_part_dims = rg.make(value_shape, start, axis_node, step); + auto zero = v0::Constant::create(element::i32, {}, {0}); + auto first_part_dims_length = rg.make(first_part_dims, zero, true); + + auto remaining_part_length = v0::Constant::create(element::i32, {1}, {-1}); + + output_shape = rg.make(OutputVector{first_part_dims_length, remaining_part_length}, 0); + } + return rg.make(value, output_shape, true); +} + +bool index_tensor_on_list(ov::pass::NodeRegistry& rg, + const Output& data, + const ov::OutputVector& indices, + const ov::Rank& rank, + Output& new_output, + bool& use_input_as_output) { + // Multiple tensors as indices. Each tensor could either be + // 1. prim::Constant() + // representing ":" in python indexing. E.g. tensor[:, :] + // 2. prim::Constant[value=...] or tensor output + // representing advanced indexing. E.g. tensor[[0, 1], [2, 0]]. + // For more info on advanced indexing, + // check https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing + + // Consider a general case of + // t: [x_1, y_1, y_2, ..., x_m, ..., y_n] + // where t is a tensor of rank m+n, {x_i} are axes where tensor index is provided, and {y_i} are axes for + // ":". Same results can be achieved through transposing t into + // t: [x_1, x_2, ..., x_m, y_1, y_2, ..., y_n] + // and use gather + // t: [x_1 * x_2 * ... * x_m, y_1 * y_2 * ... * y_n] + // tensor index = \sum_{i=1}^m (ind_i * \prod_{j=i+1}^m (x_j)) + // After gather, reshape and transpose back. + std::vector advanced_ids; + std::vector is_masked_bool; + OutputVector masked_indicies; + // for case when index is bool e.g. x[x>0], replace index with non_zero + for (size_t i = 0; i < indices.size(); ++i) { + // skip dimensions where index is None + bool is_none = false; + if (!indices[i].get_node_shared_ptr()) { + is_none = true; + } + if (auto const_input = cast_fw_node(indices[i].get_node_shared_ptr(), "prim::Constant")) { + const auto& attrs = const_input->get_attrs(); + if (attrs.find("none_value") != attrs.end()) { + is_none = true; + } + } + if (is_none) { + masked_indicies.push_back(indices[i]); + is_masked_bool.push_back(false); + continue; + } + auto id_dtype = indices[i].get_element_type(); + if (id_dtype == element::boolean || id_dtype == element::u8) { + auto idx = rg.make(indices[i], element::u8); + auto nonzero = rg.make(idx, element::i32); + auto input_order = rg.make(element::i32, Shape{2}, std::vector{1, 0}); + auto masked_id = rg.make(nonzero, input_order); + masked_indicies.push_back(masked_id); + is_masked_bool.push_back(true); + } else { + masked_indicies.push_back(indices[i]); + is_masked_bool.push_back(false); + } + advanced_ids.push_back(i); + } + + // all indicies prim::Constant(None), return input as is + if (advanced_ids.size() == 0) { + new_output = data; + use_input_as_output = true; + return true; + } + // perform gather for single element case + if (advanced_ids.size() == 1) { + auto index = masked_indicies[advanced_ids[0]]; + if (is_masked_bool[advanced_ids[0]]) { + auto gather = rg.make(data, index); + new_output = gather->output(0); + use_input_as_output = false; + return true; + } + index = rg.make(index, element::i32); + auto dim = rg.make(element::i32, Shape{}, static_cast(advanced_ids[0])); + auto gather = rg.make(data, index, dim); + new_output = gather->output(0); + use_input_as_output = false; + return true; + } + // index transformation supports only tensors with static rank + if (rank.is_dynamic()) { + return false; + } + auto adv_idx_count = advanced_ids.size(); + auto input_shape = rg.make(data, element::i32); + auto zero = rg.make(element::i32, Shape{}, 0); + auto input_dims = rg.make(input_shape, zero, rank.get_length()); + std::vector non_used_dims; + for (auto i = 0; i < rank.get_length(); i++) { + if (std::find(advanced_ids.begin(), advanced_ids.end(), i) == advanced_ids.end()) { + non_used_dims.push_back(i); + } + } + std::vector permutation_dims; + permutation_dims.insert(permutation_dims.end(), advanced_ids.begin(), advanced_ids.end()); + permutation_dims.insert(permutation_dims.end(), non_used_dims.begin(), non_used_dims.end()); + auto transpose_dims = rg.make(element::i32, Shape{permutation_dims.size()}, permutation_dims); + auto transposed_input = rg.make(data, transpose_dims); + auto flatten_input = flatten(rg, transposed_input, adv_idx_count); + auto cum_adv_index = masked_indicies[advanced_ids[adv_idx_count - 1]]; + cum_adv_index = rg.make(cum_adv_index, element::i32); + auto multiplier = input_dims->output(advanced_ids[adv_idx_count - 1]); + for (int i = static_cast(adv_idx_count) - 2; i > -1; i--) { + auto input_id = advanced_ids[i]; + auto m_idx = rg.make(masked_indicies[input_id], element::i32); + auto adv_index = rg.make(m_idx, multiplier); + cum_adv_index = rg.make(cum_adv_index, adv_index); + multiplier = rg.make(multiplier, input_dims->output(input_id)); + } + std::shared_ptr gather = rg.make(flatten_input, cum_adv_index, zero); + OutputVector concat_dims; + // check if all advanced indices are consecutive. + std::vector consequence_dims; + auto cum_adv_index_shape_tensor = rg.make(cum_adv_index, element::i32); + for (size_t i = advanced_ids[0]; i <= advanced_ids[advanced_ids.size() - 1]; i++) { + consequence_dims.push_back(i); + } + // unfold regular index axes + if (advanced_ids == consequence_dims) { + OutputVector folded_adv_idx_shape_vector; + auto minus_one = rg.make(element::i32, Shape{1}, -1); + folded_adv_idx_shape_vector.push_back(minus_one); + for (auto i : non_used_dims) { + folded_adv_idx_shape_vector.push_back(input_dims->output(i)); + } + auto folded_adv_idx_shape = rg.make(folded_adv_idx_shape_vector, 0); + gather = rg.make(gather, folded_adv_idx_shape, false); + std::vector adv_idx_permute; + for (size_t i = 1; i < advanced_ids[0] + 1; i++) { + adv_idx_permute.push_back(i); + } + adv_idx_permute.push_back(0); + for (size_t i = advanced_ids[0] + 1; i < (rank.get_length() - adv_idx_count + 1); i++) { + adv_idx_permute.push_back(i); + } + // Transpose folded advanced indexed axis to its original location. + auto permute_indicies = rg.make(element::i32, Shape{adv_idx_permute.size()}, adv_idx_permute); + gather = rg.make(gather, permute_indicies); + // unfold advanced index axes + for (size_t i = 0; i < advanced_ids[0]; i++) { + concat_dims.push_back(input_dims->output(i)); + } + concat_dims.push_back(cum_adv_index_shape_tensor); + for (auto i : non_used_dims) { + if (i < advanced_ids[0]) { + continue; + } + concat_dims.push_back(input_dims->output(i)); + } + + } else { + concat_dims.push_back(cum_adv_index_shape_tensor); + for (auto i : non_used_dims) { + concat_dims.push_back(input_dims->output(i)); + } + } + auto final_shape = rg.make(concat_dims, 0); + gather = rg.make(gather, final_shape, false); + new_output = gather->output(0); + use_input_as_output = false; + return true; +} + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index f4104a83ae3252..9346b9e18b94a3 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -129,6 +129,15 @@ Output concat_list_from_inputs(const NodeContext& context, size_t begin, s Output masked_select(const NodeContext& context, const Output& data, const Output& mask); +Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis); + +bool index_tensor_on_list(ov::pass::NodeRegistry& rg, + const Output& data, + const ov::OutputVector& indices, + const ov::Rank& rank, + Output& new_output, + bool& use_input_as_output); + namespace op { template OutputVector inplace_op(const NodeContext& context) { diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index e48c61314f4c0d..ad8f91bcda25a2 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -237,12 +237,15 @@ std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64 return nullptr; } - auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), "aten::bitwise_right_shift"); + auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), + {"aten::bitwise_right_shift", "aten.bitwise_right_shift.Tensor_Scalar"}); if (!bitwise_shift) return nullptr; auto weights_u8 = std::dynamic_pointer_cast(bitwise_and->get_input_node_shared_ptr(0)); - if (weights_u8 != std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0))) + auto weights_u8_bitwise_shift = + std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0)); + if (weights_u8->get_data_ptr() != weights_u8_bitwise_shift->get_data_ptr()) return nullptr; if (weights_u8->get_output_element_type(0) != element::u8) diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index 31f871aca4fdef..88a8e58c3cfd31 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -419,7 +419,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | ExperimentalUnbatchDataset | NO | | | ExperimentalUniqueDataset | NO | | | Expint | NO | | -| Expm1 | NO | | +| Expm1 | YES | | | ExtractGlimpse | NO | | | ExtractGlimpseV2 | NO | | | ExtractImagePatches | YES | | diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp index f2dba04b49dca7..c23890b90dcab4 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp @@ -8,6 +8,7 @@ #include "exception.hpp" #include "openvino/core/any.hpp" #include "openvino/frontend/node_context.hpp" +#include "openvino/frontend/tensorflow/visibility.hpp" #include "variable.hpp" #include "variables_map.hpp" @@ -18,7 +19,7 @@ class TranslateSession; /// Keep necessary data for a single node in the original FW graph to facilitate /// conversion process in the rules code. -class NodeContext : public ov::frontend::NodeContext { +class TENSORFLOW_API NodeContext : public ov::frontend::NodeContext { public: using Ptr = std::shared_ptr; NodeContext(const std::shared_ptr& decoder, diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index c2bf460b02b19a..26b665c275bb48 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -252,6 +252,7 @@ const std::map get_supported_ops() { {"EmptyTensorList", CreatorFunction(translate_empty_tensor_list_op)}, {"EnsureShape", CreatorFunction(translate_identity_op)}, {"ExpandDims", CreatorFunction(translate_expand_dims_op)}, + {"Expm1", CreatorFunction(translate_expm1_op)}, {"ExtractImagePatches", CreatorFunction(translate_extract_image_patches_op)}, {"FakeQuantWithMinMaxVars", CreatorFunction(translate_fake_quant_op)}, {"FakeQuantWithMinMaxVarsPerChannel", CreatorFunction(translate_fake_quant_op)}, diff --git a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp index 34b2a82152ccfc..cbdc506671aa67 100644 --- a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp +++ b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp @@ -235,6 +235,9 @@ bool pass::SwitchMergeResolver::run_on_model(const shared_ptr& m) { auto else_body = make_shared(else_results, else_params); auto if_op = make_shared(cond); + // in case TensorFlow models, we can deduce predicate shape that must be a scalar + if_op->get_rt_info()["tf_switch_merge_if"] = true; + set_cf_marker(if_cf_marker, if_op); if_op->set_then_body(then_body); if_op->set_else_body(else_body); diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 1fa3386fce47af..5bbf056af1dbe4 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -74,6 +74,7 @@ OP_CONVERTER(translate_mul_op); OP_CONVERTER(translate_dynamic_partition_op); OP_CONVERTER(translate_einsum_op); OP_CONVERTER(translate_elu_op); +OP_CONVERTER(translate_expm1_op); OP_CONVERTER(translate_expand_dims_op); OP_CONVERTER(translate_extract_image_patches_op); OP_CONVERTER(translate_fake_quant_op); diff --git a/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp b/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp index 09eb606f41c353..c2fdee9c0cd319 100644 --- a/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp +++ b/src/frontends/tensorflow_common/src/op/depthwise_conv_2d.cpp @@ -43,10 +43,6 @@ OutputVector translate_depthwise_conv_2d_native_op(const NodeContext& node) { Strides dilations(2); convert_nhwc_to_hw(is_nhwc, tf_strides, strides); convert_nhwc_to_hw(is_nhwc, tf_dilations, dilations); - - Shape ng_image_shape(2); - Shape ng_kernel_shape(2); - convert_nhwc_to_nchw(is_nhwc, input, ov::Rank(4)); // prepare filter to have a number of groups equal to CIN diff --git a/src/frontends/tensorflow_common/src/op/expm1.cpp b/src/frontends/tensorflow_common/src/op/expm1.cpp new file mode 100644 index 00000000000000..21759bd3cbdcb8 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/expm1.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/exp.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_expm1_op(const NodeContext& node) { + default_op_checks(node, 1, {"Expm1"}); + auto input = node.get_input(0); + auto const_one = create_same_type_const_scalar(input, 1); + auto exp = make_shared(input); + auto res = make_shared(exp, const_one); + set_node_name(node.get_name(), res); + return {res}; +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp b/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp index 5cbf4c92b8a35c..e082be4943977c 100644 --- a/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp +++ b/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp @@ -199,8 +199,13 @@ OutputVector translate_tensor_list_length_op(const NodeContext& node) { auto tensor_list_shape = make_shared(input_handle, element::i32); auto list_length = make_shared(tensor_list_shape, zero_const, one_const, one_const); - set_node_name(node.get_name(), list_length); - return {list_length}; + // output of TensorListLength must be a scalar + // after Slice operation it is a 1D tensor with one element + auto scalar_shape = make_shared(element::i32, Shape{0}, std::vector{}); + auto list_length_scalar = make_shared(list_length, scalar_shape, false); + + set_node_name(node.get_name(), list_length_scalar); + return {list_length_scalar}; } OutputVector translate_tensor_list_concat_v2_op(const NodeContext& node) { diff --git a/src/frontends/tensorflow_common/src/op/tobool.cpp b/src/frontends/tensorflow_common/src/op/tobool.cpp index 9f1082d5df87da..9025a35f549799 100644 --- a/src/frontends/tensorflow_common/src/op/tobool.cpp +++ b/src/frontends/tensorflow_common/src/op/tobool.cpp @@ -9,6 +9,8 @@ #include "openvino/op/logical_and.hpp" #include "openvino/op/logical_or.hpp" #include "openvino/op/not_equal.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reduce_logical_and.hpp" #include "openvino/op/reduce_prod.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" @@ -22,47 +24,49 @@ namespace tensorflow { namespace op { OutputVector translate_tobool_op(const NodeContext& node) { // (rank(x) == 0 && x != 0) || (rank > 0 && ReduceProd(ShapeOf(x))) > 0 - default_op_checks(node, 1, {"ToBool"}); auto x = node.get_input(0); // prepare auxiliary zero and zero constants of the same type as the inputs - auto zero = create_same_type_const_scalar(x, 0); - auto zero_2 = make_shared(element::i32, Shape{}, 0); - auto true_const = make_shared(element::boolean, Shape{}, true); + auto zero_x = create_same_type_const_scalar(x, 0); + auto zero_i64 = make_shared(element::i64, Shape{}, 0); + auto one_i64 = make_shared(element::i64, Shape{}, 1); auto false_const = make_shared(element::boolean, Shape{}, false); // compute a mask to get rank(x) == 0 - auto x_rank = compute_subgraph_scalar_rank(x, element::i32); + auto x_rank = compute_subgraph_scalar_rank(x, element::i64, true); + // 1. try to evaluate if it satisfy non-zero scalar input // compute rank(x) == 0 - auto is_zero = make_shared(x_rank, zero_2); - + auto is_rank_zero = make_shared(x_rank, zero_i64); // compute mask to get x != 0 - auto is_not_zero = make_shared(x, zero); - + auto is_x_not_zero = make_shared(x, zero_x)->output(0); // compute (rank(x) == 0 && x != 0) - auto logical_and = make_shared(is_zero, is_not_zero); - // compute rank(x) > 0 - auto greater_than_zero = make_shared(x_rank, zero_2); + auto scalar_cond = make_shared(is_rank_zero, is_x_not_zero)->output(0); + // generate reduce_axes + auto reduce_axes = make_shared(zero_i64, x_rank, one_i64); + scalar_cond = make_shared(scalar_cond, reduce_axes, false); + // correct result for empty tensor, for which scalar_cond is still equal to True + scalar_cond = make_shared(is_rank_zero, scalar_cond, false_const); + // 2. try to evaluate if it is non-scalar input tensor and not empty tensor + // compute rank(x) > 0 + auto rank_greater_than_zero = make_shared(x_rank, zero_i64); // compute ShapeOf(x) - auto cond_shape = make_shared(x, element::i32); + auto x_shape = make_shared(x, element::i64); // compute ReduceProd(ShapeOf(x))) and axis - auto axis = make_shared(element::i32, Shape{}, 0); - auto reduce_prod = make_shared(cond_shape, axis); - + auto reduce_axis = make_shared(element::i32, Shape{}, 0); + auto num_elems = make_shared(x_shape, reduce_axis, false); // compute ReduceProd(ShapeOf(x))) > 0 - auto greater_than__zero_2 = make_shared(reduce_prod, zero_2); + auto num_elems_greater_than_zero = make_shared(num_elems, zero_i64); // compute (rank > 0 && ReduceProd(ShapeOf(x))) > 0 - auto logical_and_2 = make_shared(greater_than_zero, greater_than__zero_2); - - auto logical_or = make_shared(logical_and, logical_and_2); + // it will be a scalar + auto non_scalar_tensor_not_empty = make_shared(rank_greater_than_zero, num_elems_greater_than_zero); - auto tobool = make_shared(logical_or, true_const, false_const); - set_node_name(node.get_name(), tobool); - return tobool->outputs(); + auto to_bool = make_shared(scalar_cond, non_scalar_tensor_not_empty); + set_node_name(node.get_name(), to_bool); + return to_bool->outputs(); } } // namespace op } // namespace tensorflow } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/top_k.cpp b/src/frontends/tensorflow_common/src/op/top_k.cpp index c916fe5b746327..4d3bfdcf64bcaf 100644 --- a/src/frontends/tensorflow_common/src/op/top_k.cpp +++ b/src/frontends/tensorflow_common/src/op/top_k.cpp @@ -3,6 +3,7 @@ // #include "common_op_table.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/topk.hpp" using namespace std; @@ -14,22 +15,35 @@ namespace tensorflow { namespace op { NamedOutputVector translate_top_k_base_op(const NodeContext& node, const ov::Output& k_input, - int min_input_size) { + int min_input_size, + const ov::element::Type& index_type = ov::element::i32) { default_op_checks(node, min_input_size, {"TopK", "TopKV2", "TOPK_V2"}); auto input = node.get_input(0); // retrieve k attribute bool sorted = node.get_attribute("sorted", true); + auto topk_index_type = index_type; + if (index_type == ov::element::i16) { + // v11::TopK supports only int32 and int64 output index type + topk_index_type = ov::element::i32; + } auto top_k = make_shared(input, k_input, -1, ov::op::v11::TopK::Mode::MAX, sorted ? v11::TopK::SortType::SORT_VALUES : v11::TopK::SortType::SORT_INDICES, - ov::element::i32, + topk_index_type, true); + auto values = top_k->output(0); + auto indices = top_k->output(1); + if (index_type != topk_index_type) { + // satisfy the requested output index type + indices = make_shared(indices, index_type)->output(0); + } set_node_name(node.get_name(), top_k); - return {{"values", top_k->output(0)}, {"indices", top_k->output(1)}}; + return {{"values", values}, {"indices", indices}}; } + NamedOutputVector translate_top_k_op(const NodeContext& node) { // retrieve k attribute auto k = node.get_attribute("k"); @@ -39,8 +53,9 @@ NamedOutputVector translate_top_k_op(const NodeContext& node) { NamedOutputVector translate_top_k_v2_op(const NodeContext& node) { default_op_checks(node, 2, {"TopKV2", "TOPK_V2"}); + auto index_type = node.get_attribute("index_type", ov::element::i32); auto k_input = node.get_input(1); - return translate_top_k_base_op(node, k_input, 1); + return translate_top_k_base_op(node, k_input, 1, index_type); } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp b/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp index 7c5c0a196aa018..d3ca3156ed1347 100644 --- a/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp +++ b/src/frontends/tensorflow_common/src/op/unique_with_counts.cpp @@ -6,6 +6,7 @@ #include "openvino/op/unique.hpp" using namespace std; +using namespace ov; using namespace ov::op; namespace ov { @@ -19,8 +20,9 @@ OutputVector translate_unique_with_counts_op(const NodeContext& node) { // get input 'x' from node and node name auto x = node.get_input(0); auto node_name = node.get_name(); + auto out_idx = node.get_attribute("out_idx", element::i32); - auto unique = make_shared(x, false, ov::element::i32, ov::element::i32); + auto unique = make_shared(x, false, out_idx, out_idx); set_node_name(node_name, unique); // story 'y', 'idx', and 'count' outputs from Unique in separate variables @@ -34,4 +36,4 @@ OutputVector translate_unique_with_counts_op(const NodeContext& node) { } // namespace op } // namespace tensorflow } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp index b3415cf288c4be..a2cafe16e075fb 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp @@ -14,7 +14,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -struct TensorMetaInfo { +struct TENSORFLOW_LITE_API TensorMetaInfo { std::shared_ptr m_quantization_info; std::shared_ptr m_sparsity_info; ov::PartialShape m_partial_shape; @@ -23,11 +23,11 @@ struct TensorMetaInfo { std::string m_tensor_name; }; -class DecoderBase : public ov::frontend::DecoderBase {}; +class TENSORFLOW_LITE_API DecoderBase : public ov::frontend::DecoderBase {}; // DecoderBaseOperation corresponds to operation node to retrieve its attributes and information about input and output // tensors -class DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { +class TENSORFLOW_LITE_API DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { public: /// \brief Get input tensor name by index /// Operation nodes are connected between each other by tensors. @@ -71,7 +71,7 @@ class DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { // DecoderBaseTensor corresponds to tensor node to retrieve information about type, shapem quantization and sparsity // information -class DecoderBaseTensor : public ov::frontend::tensorflow_lite::DecoderBase { +class TENSORFLOW_LITE_API DecoderBaseTensor : public ov::frontend::tensorflow_lite::DecoderBase { public: /// \brief Get tensor info virtual TensorMetaInfo get_tensor_info() const = 0; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp index 8ec2bc3f05c358..2084147c9ab284 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp @@ -24,7 +24,7 @@ namespace tensorflow_lite { /// DecoderBaseOperation (for op 1), ..., DecoderBaseOperation (for op k), /// where n - number of inputs in the model, m - number of outputs in the model k - number of operation nodes. /// NOTE: constants are ignored and no decoder object is returned for constant. -class GraphIterator : ::ov::RuntimeAttribute { +class TENSORFLOW_LITE_API GraphIterator : ::ov::RuntimeAttribute { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp index bd0f1e28283a27..66977db1caa5d4 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp @@ -15,7 +15,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -class QuantizationInfo : public ov::RuntimeAttribute { +class TENSORFLOW_LITE_API QuantizationInfo : public ov::RuntimeAttribute { public: OPENVINO_RTTI("QuantizationInfo"); QuantizationInfo() = default; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp index 596cb651763d57..c1ab8d4fd04941 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp @@ -16,7 +16,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -class SparsityInfo : public ov::RuntimeAttribute { +class TENSORFLOW_LITE_API SparsityInfo : public ov::RuntimeAttribute { public: struct SparsityDataDesc { uint8_t segments_type; diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 621c0074fc9d1e..5674c75dd546d7 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -580,6 +580,12 @@ static constexpr Property dynamic_quantization */ static constexpr Property kv_cache_precision{"KV_CACHE_PRECISION"}; +/** + * @brief This property scales down activations to prevent overflows when inference precision is f16. + * @ingroup ov_runtime_cpp_prop_api + */ +static constexpr Property activations_scale_factor{"ACTIVATIONS_SCALE_FACTOR"}; + } // namespace hint /** @@ -1345,4 +1351,10 @@ static constexpr Property affinity{"AFFINITY"}; */ static constexpr Property, PropertyMutability::RO> execution_devices{"EXECUTION_DEVICES"}; +/** + * @brief Path to the file with model's weights. + * + * @note This property is used for weightless caching. Only used when ov::CacheMode Property is set to "OPTIMIZE_SIZE". + */ +static constexpr Property weights_path{"WEIGHTS_PATH"}; } // namespace ov diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 32b43f346e9e44..244d27b5eebb67 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1447,6 +1447,18 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( ov::AnyMap update_config = config; update_config[ov::loaded_from_cache.name()] = true; + + if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) { + std::string weights_path = cacheContent.modelPath; + auto pos = weights_path.rfind('.'); + if (pos != weights_path.npos && weights_path.substr(pos) == ".xml") { + weights_path = weights_path.substr(0, pos); + weights_path += ".bin"; + } + if (ov::util::file_exists(weights_path)) { + update_config[ov::weights_path.name()] = weights_path; + } + } compiled_model = context ? plugin.import_model(networkStream, context, update_config) : plugin.import_model(networkStream, update_config); }); diff --git a/src/inference/src/os/cpu_map_info.hpp b/src/inference/src/os/cpu_map_info.hpp index 497b25c3b68153..2cc6cef768d68d 100644 --- a/src/inference/src/os/cpu_map_info.hpp +++ b/src/inference/src/os/cpu_map_info.hpp @@ -53,6 +53,39 @@ class CPU { std::map _numaid_mapping_table; std::mutex _cpu_mutex; int _socket_idx = 0; + +private: + /** + * @brief Sort proc_type_table by CPU ID on which application is running. The numa node containing this CPU ID + * will move to first row. + * @param[in] _processor_id CPU ID on which application is running. + * @param[in] _proc_type_table summary table of number of processors per type + * @param[in] _cpu_mapping_table CPU mapping table for each processor + * @return + */ + void sort_table_by_cpu_id(const int _processor_id, + std::vector>& _proc_type_table, + const std::vector>& _cpu_mapping_table) { + int current_numa_node = 0; + int current_socket = 0; + + for (auto& row : _cpu_mapping_table) { + if (_processor_id == row[CPU_MAP_PROCESSOR_ID]) { + current_numa_node = row[CPU_MAP_NUMA_NODE_ID]; + current_socket = row[CPU_MAP_SOCKET_ID]; + break; + } + } + for (size_t i = 1; i < _proc_type_table.size(); i++) { + if ((current_numa_node == _proc_type_table[i][PROC_NUMA_NODE_ID]) && + (current_socket == _proc_type_table[i][PROC_SOCKET_ID])) { + std::rotate(_proc_type_table.begin() + 1, _proc_type_table.begin() + i, _proc_type_table.end()); + break; + } + } + }; + + friend class LinuxSortProcTableTests; }; CPU& cpu_info(); diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp index e30bcbbe8bc55e..f8bd16173b8fce 100644 --- a/src/inference/src/os/lin/lin_system_conf.cpp +++ b/src/inference/src/os/lin/lin_system_conf.cpp @@ -282,6 +282,11 @@ CPU::CPU() { OPENVINO_THROW("CPU affinity check failed. No CPU is eligible to run inference."); }; + if (_proc_type_table.size() > 1) { + int cur_processor_id = sched_getcpu(); + sort_table_by_cpu_id(cur_processor_id, _proc_type_table, _cpu_mapping_table); + } + _org_proc_type_table = _proc_type_table; cpu_debug(); diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp index f0ea4f181896ac..a4129a80b599ba 100644 --- a/src/inference/src/os/win/win_system_conf.cpp +++ b/src/inference/src/os/win/win_system_conf.cpp @@ -52,6 +52,11 @@ CPU::CPU() { } } + if (_proc_type_table.size() > 1) { + int cur_processor_id = GetCurrentProcessorNumber(); + sort_table_by_cpu_id(cur_processor_id, _proc_type_table, _cpu_mapping_table); + } + cpu_debug(); } diff --git a/src/inference/tests/unit/cpu_map_parser/update_proc_table.cpp b/src/inference/tests/unit/cpu_map_parser/update_proc_table.cpp new file mode 100644 index 00000000000000..fe255b85f47a33 --- /dev/null +++ b/src/inference/tests/unit/cpu_map_parser/update_proc_table.cpp @@ -0,0 +1,173 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_common.hpp" +#include "openvino/runtime/system_conf.hpp" +#include "os/cpu_map_info.hpp" + +using namespace testing; + +namespace ov { + +#ifdef __linux__ + +struct LinuxSortProcTableTestCase { + int current_processor_id; + std::vector> _proc_type_table_input; + std::vector> _cpu_mapping_table; + std::vector> _proc_type_table_output; +}; + +class LinuxSortProcTableTests : public ov::test::TestsCommon, + public testing::WithParamInterface> { +public: + void SetUp() override { + const auto& test_data = std::get<0>(GetParam()); + + CPU& cpu = cpu_info(); + std::vector> test_proc_type_table = test_data._proc_type_table_input; + + cpu.sort_table_by_cpu_id(test_data.current_processor_id, test_proc_type_table, test_data._cpu_mapping_table); + + ASSERT_EQ(test_proc_type_table, test_data._proc_type_table_output); + } +}; + +LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_1 = { + 2, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1}, + {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1}, + {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1}, + {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1}, + {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1}, + {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1}, + {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1}, {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1}, + {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1}, + {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1}, + {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1}, + {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1}, + {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1}, + {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1}, + {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1}, + {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1}, + {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1}, {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1}, + {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1}, {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1}, + {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1}, {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1}, + {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1}, {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1}, + {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1}, {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1}, + {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1}, {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1}, + }, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}}, +}; +LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_2 = { + 16, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1}, + {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1}, + {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1}, + {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1}, + {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1}, + {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1}, + {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1}, {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1}, + {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1}, + {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1}, + {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1}, + {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1}, + {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1}, + {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1}, + {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1}, + {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1}, + {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1}, {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1}, + {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1}, {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1}, + {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1}, {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1}, + {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1}, {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1}, + {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1}, {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1}, + {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1}, {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1}, + }, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}, {12, 6, 0, 6, 0, 0}}, +}; +LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_3 = { + 7, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1}, + {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1}, + {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1}, + {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1}, + {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1}, + {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1}, + {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1}, {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1}, + {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1}, + {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1}, + {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1}, + {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1}, + {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1}, + {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1}, + {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1}, + {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1}, + {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1}, {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1}, + {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1}, {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1}, + {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1}, {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1}, + {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1}, {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1}, + {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1}, {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1}, + {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1}, {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1}, + }, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}}, +}; +LinuxSortProcTableTestCase proc_table_2sockets_24cores_hyperthreading_4 = { + 21, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 2, 1, 12, HYPER_THREADING_PROC, 12, -1}, + {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 2, 1, 13, HYPER_THREADING_PROC, 13, -1}, + {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 2, 1, 14, HYPER_THREADING_PROC, 14, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 2, 1, 15, HYPER_THREADING_PROC, 15, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 2, 1, 16, HYPER_THREADING_PROC, 16, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 2, 1, 17, HYPER_THREADING_PROC, 17, -1}, + {12, 1, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 3, 1, 18, HYPER_THREADING_PROC, 18, -1}, + {14, 1, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 3, 1, 19, HYPER_THREADING_PROC, 19, -1}, + {16, 1, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 3, 1, 20, HYPER_THREADING_PROC, 20, -1}, + {18, 1, 0, 9, HYPER_THREADING_PROC, 9, -1}, {19, 3, 1, 21, HYPER_THREADING_PROC, 21, -1}, + {20, 1, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 3, 1, 22, HYPER_THREADING_PROC, 22, -1}, + {22, 1, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1}, + {24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 2, 1, 12, MAIN_CORE_PROC, 12, -1}, + {26, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, {27, 2, 1, 13, MAIN_CORE_PROC, 13, -1}, + {28, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {29, 2, 1, 14, MAIN_CORE_PROC, 14, -1}, + {30, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, {31, 2, 1, 15, MAIN_CORE_PROC, 15, -1}, + {32, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {33, 2, 1, 16, MAIN_CORE_PROC, 16, -1}, + {34, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, {35, 2, 1, 17, MAIN_CORE_PROC, 17, -1}, + {36, 1, 0, 6, MAIN_CORE_PROC, 6, -1}, {37, 3, 1, 18, MAIN_CORE_PROC, 18, -1}, + {38, 1, 0, 7, MAIN_CORE_PROC, 7, -1}, {39, 3, 1, 19, MAIN_CORE_PROC, 19, -1}, + {40, 1, 0, 8, MAIN_CORE_PROC, 8, -1}, {41, 3, 1, 20, MAIN_CORE_PROC, 20, -1}, + {42, 1, 0, 9, MAIN_CORE_PROC, 9, -1}, {43, 3, 1, 21, MAIN_CORE_PROC, 21, -1}, + {44, 1, 0, 10, MAIN_CORE_PROC, 10, -1}, {45, 3, 1, 22, MAIN_CORE_PROC, 22, -1}, + {46, 1, 0, 11, MAIN_CORE_PROC, 11, -1}, {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1}, + }, + {{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 3, 1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}}, +}; + +TEST_P(LinuxSortProcTableTests, LinuxProcTable) {} + +INSTANTIATE_TEST_SUITE_P(CPUMap, + LinuxSortProcTableTests, + testing::Values(proc_table_2sockets_24cores_hyperthreading_1, + proc_table_2sockets_24cores_hyperthreading_2, + proc_table_2sockets_24cores_hyperthreading_3, + proc_table_2sockets_24cores_hyperthreading_4)); +#endif +} // namespace ov diff --git a/src/plugins/auto/src/auto_schedule.cpp b/src/plugins/auto/src/auto_schedule.cpp index ea5f2159179824..c504e8e4457870 100644 --- a/src/plugins/auto/src/auto_schedule.cpp +++ b/src/plugins/auto/src/auto_schedule.cpp @@ -239,8 +239,7 @@ void AutoSchedule::init() { std::pair worker; std::list