diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index a36238fca4a874..9a6aba6a88971e 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -66,44 +66,23 @@ jobs: INSTALL_DIR: $(WORK_DIR)\install_pkg INSTALL_TEST_DIR: $(INSTALL_DIR)\tests SETUPVARS: $(INSTALL_DIR)\setupvars.bat - PYTHON_DIR: C:\hostedtoolcache\windows\Python\3.11.2\x64 CMAKE_VERSION: 3.24.0 CMAKE_CMD: $(WORK_DIR)\cmake-$(CMAKE_VERSION)-windows-x86_64\cmake-$(CMAKE_VERSION)-windows-x86_64\bin\cmake.exe OV_CMAKE_TOOLCHAIN_FILE: $(REPO_DIR)\cmake\toolchains\mt.runtime.win32.toolchain.cmake - PYTHON_VENV_DIR: $(WORK_DIR)\.venv - + PYTHON_EXE: C:\hostedtoolcache\windows\Python\3.8.2\x64\python.exe steps: - script: | rd /Q /S $(WORK_DIR) & mkdir $(WORK_DIR) rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR) - rd /Q /S $(WORK_DIR) & mkdir C:\hostedtoolcache\windows\Python\3.11.2 - rd /Q /S $(BUILD_DIR) & mkdir C:\hostedtoolcache\windows\Python\3.11.2\x64 rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR) rd /Q /S $(BUILD_SAMPLES_TESTS_DIR) & mkdir $(BUILD_SAMPLES_TESTS_DIR) displayName: 'Make dir' - - script: curl -O https://www.python.org/ftp/python/3.11.2/python-3.11.2-amd64.exe - displayName: 'Download Python' - workingDirectory: $(WORK_DIR) - - - script: | - python-3.11.2-amd64.exe /passive InstallAllUsers=0 Include_launcher=0 TargetDir=C:\hostedtoolcache\windows\Python\3.11.2\x64 && ^ - cp C:\hostedtoolcache\windows\Python\3.8.2\x64.complete C:\hostedtoolcache\windows\Python\3.11.2\x64.complete - displayName: 'Install Python' - workingDirectory: $(WORK_DIR) - - - task: UsePythonVersion@0 - displayName: 'Use Python' - inputs: - versionSpec: '3.11.2' - disableDownloadFromRegistry: true - - script: | powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom" - tree C:\hostedtoolcache\windows\Python - where python - python --version + where $(PYTHON_EXE) + $(PYTHON_EXE) --version where java java -version wmic computersystem get TotalPhysicalMemory @@ -124,20 +103,20 @@ jobs: path: openvino_contrib - script: | - python -m pip install --upgrade pip + $(PYTHON_EXE) -m pip install --upgrade pip rem For running Python API tests - python -m pip install -r $(REPO_DIR)\src\bindings\python\src\compatibility\openvino\requirements-dev.txt - python -m pip install -r $(REPO_DIR)\src\bindings\python\wheel\requirements-dev.txt - python -m pip install -r $(REPO_DIR)\src\bindings\python\requirements.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\src\bindings\python\src\compatibility\openvino\requirements-dev.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\src\bindings\python\wheel\requirements-dev.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\src\bindings\python\requirements.txt rem For running Paddle frontend unit tests - python -m pip install -r $(REPO_DIR)\src\frontends\paddle\tests\requirements.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\src\frontends\paddle\tests\requirements.txt rem For running ONNX frontend unit tests - python -m pip install -r $(REPO_DIR)\src\frontends\onnx\tests\requirements.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\src\frontends\onnx\tests\requirements.txt rem For running TensorFlow frontend unit tests - python -m pip install -r $(REPO_DIR)\src\frontends\tensorflow\tests\requirements.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\src\frontends\tensorflow\tests\requirements.txt rem For MO unit tests - python -m pip install -r $(REPO_DIR)\tools\mo\requirements.txt - python -m pip install -r $(REPO_DIR)\tools\mo\requirements_dev.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\tools\mo\requirements.txt + $(PYTHON_EXE) -m pip install -r $(REPO_DIR)\tools\mo\requirements_dev.txt rem Speed up build powershell -command "Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip -OutFile cmake-$(CMAKE_VERSION)-windows-x86_64.zip" powershell -command "Expand-Archive -Force cmake-$(CMAKE_VERSION)-windows-x86_64.zip" @@ -162,10 +141,10 @@ jobs: -DENABLE_TESTS=ON ^ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ^ -DENABLE_STRICT_DEPENDENCIES=OFF ^ + -DPython3_EXECUTABLE=$(PYTHON_EXE) ^ -DENABLE_PYTHON=ON ^ -DBUILD_nvidia_plugin=OFF ^ -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ^ - -DPython3_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.11.2\x64\python.exe" ^ -DOPENVINO_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules ^ -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" ^ -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" ^ @@ -190,7 +169,7 @@ jobs: - script: dir $(INSTALL_DIR) /s displayName: 'List install files' - - script: python -m pip install openvino-dev --find-links=$(INSTALL_DIR)\tools + - script: $(PYTHON_EXE) -m pip install openvino-dev --find-links=$(INSTALL_DIR)\tools displayName: 'Install Wheels' - script: | @@ -215,12 +194,12 @@ jobs: if not exist %USERPROFILE%\Documents\Intel\OpenVINO\openvino_c_samples_build\ exit 1 displayName: 'Build c samples' - - script: python -m pip install -r $(INSTALL_TEST_DIR)\smoke_tests\requirements.txt + - script: $(PYTHON_EXE) -m pip install -r $(INSTALL_TEST_DIR)\smoke_tests\requirements.txt displayName: 'Install dependencies for samples smoke tests' - script: | call $(SETUPVARS) && ^ - python -m pytest $(INSTALL_DIR)\tests\smoke_tests\ --env_conf $(INSTALL_TEST_DIR)\smoke_tests\env_config.yml -s --junitxml=$(INSTALL_TEST_DIR)/TEST-SamplesSmokeTests.xml + $(PYTHON_EXE) -m pytest $(INSTALL_DIR)\tests\smoke_tests\ --env_conf $(INSTALL_TEST_DIR)\smoke_tests\env_config.yml -s --junitxml=$(INSTALL_TEST_DIR)/TEST-SamplesSmokeTests.xml env: IE_APP_PATH: $(INSTALL_DIR)\samples_bin IE_APP_PYTHON_PATH: $(INSTALL_DIR)\samples\python\ diff --git a/.ci/azure/windows_conditional_compilation.yml b/.ci/azure/windows_conditional_compilation.yml index 70c7ebfff239e6..3d2df492194950 100644 --- a/.ci/azure/windows_conditional_compilation.yml +++ b/.ci/azure/windows_conditional_compilation.yml @@ -64,15 +64,6 @@ jobs: SETUPVARS: $(INSTALL_DIR)\setupvars.bat steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.11.2' - addToPath: true - architecture: 'x64' - githubToken: $(auth_token) - displayName: Setup Python 3.11 - name: setupPython - - script: | powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom" where python diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index b7e688d55f8c4a..fca75e99dc4109 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -17,8 +17,8 @@ jobs: - name: Clone OpenVINO uses: actions/checkout@v4 with: - submodules: true - lfs: true + submodules: 'true' + lfs: 'true' - name: Install apt-get dependencies uses: awalsh128/cache-apt-pkgs-action@v1.3.0 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index bf602387ae8c6d..733dfed4c09d14 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -1,21 +1,25 @@ -name: Tests on Linux (Ubuntu 20.04, Python 3.11) +name: Linux (Ubuntu 20.04, Python 3.11) on: schedule: # at 00:00 on Wednesday and Saturday - cron: '0 0 * * 3,6' workflow_dispatch: pull_request: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' + paths: + - '**' + - '!**/docs/**' + - '!docs/**' + - 'docs/snippets/**' + - '!**/**.md' + - '!**.md' push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' + paths: + - '**' + - '!docs/**' + - '!**/docs/**' + - 'docs/snippets/**' + - '!**/**.md' + - '!**.md' branches: - master - 'releases/**' @@ -50,6 +54,7 @@ jobs: OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib INSTALL_DIR: /__w/openvino/openvino/openvino_install INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install + DEVELOPER_PACKAGE_DIR: /__w/openvino/openvino/developer_package_install BUILD_DIR: /__w/openvino/openvino/openvino_build CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp @@ -74,6 +79,7 @@ jobs: repository: 'openvinotoolkit/openvino_contrib' path: ${{ env.OPENVINO_CONTRIB_REPO }} submodules: 'true' + ref: 'master' # # Dependencies @@ -150,6 +156,7 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${DEVELOPER_PACKAGE_DIR} -DCOMPONENT=developer_package -P ${BUILD_DIR}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake - name: Pack Artifacts @@ -164,6 +171,10 @@ jobs: tar -czvf ${BUILD_DIR}/openvino_package.tar.gz * popd + pushd ${DEVELOPER_PACKAGE_DIR} + tar -czvf ${BUILD_DIR}/openvino_developer_package.tar.gz * + popd + pushd ${INSTALL_TEST_DIR} tar -czvf ${BUILD_DIR}/openvino_tests.tar.gz * popd @@ -203,6 +214,14 @@ jobs: path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' + - name: Upload openvino developer package + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: openvino_developer_package + path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz + if-no-files-found: 'error' + - name: Upload openvino debian packages if: ${{ always() }} uses: actions/upload-artifact@v3 @@ -493,7 +512,6 @@ jobs: ONNX_RUNTIME_BUILD_DIR: /__w/openvino/openvino/onnxruntime/build steps: - - name: Fetch install_build_dependencies.sh uses: actions/checkout@v4 with: @@ -501,6 +519,7 @@ jobs: install_build_dependencies.sh sparse-checkout-cone-mode: false path: ${{ env.OPENVINO_REPO }} + ref: 'master' - name: Install git run: | @@ -932,6 +951,16 @@ jobs: TEST_DEVICE: CPU TEST_PRECISION: FP16 + - name: ONNX Layer Tests + run: | + python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt + # requires 'unit_tests' from 'tools/mo' + export PYTHONPATH=${OPENVINO_REPO}/tools/mo/:$PYTHONPATH + python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + - name: TensorFlow 1 Layer Tests - TF FE run: | python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt @@ -1111,13 +1140,14 @@ jobs: with: name: test-results-functional-cpu path: | - ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + ${{ env.INSTALL_TEST_DIR }}/temp/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/*.log ${{ env.INSTALL_TEST_DIR }}/logs/failed/*.log ${{ env.INSTALL_TEST_DIR }}/logs/crashed/*.log ${{ env.INSTALL_TEST_DIR }}/logs/hanged/*.log ${{ env.INSTALL_TEST_DIR }}/logs/interapted/*.log - ${{ env.INSTALL_TEST_DIR }}/logs/disabled_tests.log ${{ env.INSTALL_TEST_DIR }}/logs/hash_table.csv + ${{ env.PARALLEL_TEST_CACHE }} if-no-files-found: 'error' TensorFlow_Hub_Models_Tests: @@ -1292,3 +1322,122 @@ jobs: path: | ${{ env.INSTALL_TEST_DIR }}/TEST*.html if-no-files-found: 'error' + + NVIDIA_Plugin: + name: NVIDIA plugin + needs: Build + defaults: + run: + shell: bash + runs-on: aks-linux-16-cores + container: + image: openvinogithubactions.azurecr.io/dockerhub/nvidia/cuda:11.8.0-runtime-ubuntu20.04 + volumes: + - /mount/caches:/mount/caches + env: + CMAKE_BUILD_TYPE: 'Release' + CMAKE_GENERATOR: 'Ninja Multi-Config' + CMAKE_CUDA_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: ccache + CMAKE_C_COMPILER_LAUNCHER: ccache + INSTALL_DIR: /__w/openvino/openvino/install + OPENVINO_DEVELOPER_PACKAGE: /__w/openvino/openvino/install/developer_package + OPENVINO_REPO: /__w/openvino/openvino/openvino + OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib + NVIDIA_BUILD_DIR: /__w/openvino/openvino/nvidia_plugin_build + DEBIAN_FRONTEND: 'noninteractive' + CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release + CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp + CCACHE_MAXSIZE: 50G + + steps: + - name: Install Prerequisites + run: apt update && apt install -y git ca-certificates + + - name: Download OpenVINO package + uses: actions/download-artifact@v3 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO Developer package + uses: actions/download-artifact@v3 + with: + name: openvino_developer_package + path: ${{ env.INSTALL_DIR }} + + - name: Extract OpenVINO packages + run: | + pushd ${INSTALL_DIR} + tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} + popd + + pushd ${INSTALL_DIR} + tar -xzf openvino_developer_package.tar.gz -C ${INSTALL_DIR} + popd + + # TODO: replace with sparse checkout below + - name: Clone OpenVINO + uses: actions/checkout@v4 + with: + path: ${{ env.OPENVINO_REPO }} + + - name: Fetch install_build_dependencies.sh + if: ${{ 'false' }} + uses: actions/checkout@v4 + with: + sparse-checkout: | + install_build_dependencies.sh + sparse-checkout-cone-mode: false + path: ${{ env.OPENVINO_REPO }} + + - name: Clone OpenVINO Contrib + uses: actions/checkout@v4 + with: + repository: 'openvinotoolkit/openvino_contrib' + path: ${{ env.OPENVINO_CONTRIB_REPO }} + ref: 'master' + + # + # Dependencies + # + + - name: Install build dependencies + run: | + ${OPENVINO_REPO}/install_build_dependencies.sh + apt -y --no-install-recommends install software-properties-common + + - name: Install CUDA + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin + mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 + + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub + add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" + apt update + apt install -y \ + libcudnn8=8.9.4.*-1+cuda11.8 \ + libcudnn8-dev=8.9.4.*-1+cuda11.8 \ + libcudnn8-samples=8.9.4.*-1+cuda11.8 \ + cuda-runtime-11-8 \ + cuda-11-8 \ + libcutensor1=1.6.1.5-1 \ + libcutensor-dev=1.6.1.5-1 \ + cuda-drivers=520.61.05-1 + + # + # Build + # + + - name: Cmake & Build - NVIDIA Plugin + run: | + source ${INSTALL_DIR}/setupvars.sh + cmake \ + -DOpenVINODeveloperPackage_DIR=${OPENVINO_DEVELOPER_PACKAGE}/cmake \ + -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ + -S ${OPENVINO_CONTRIB_REPO}/modules/nvidia_plugin \ + -B ${NVIDIA_BUILD_DIR} + cmake --build ${NVIDIA_BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --verbose -- ov_nvidia_func_tests ov_nvidia_unit_tests + + - name: Show ccache stats + run: ccache --show-stats diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index b8567d57a6a6cd..c8fb34cca85244 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -73,6 +73,7 @@ jobs: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} lfs: 'true' + ref: 'master' # # Dependencies @@ -110,6 +111,9 @@ jobs: # For running Paddle frontend unit tests python3 -m pip install -r ${OPENVINO_REPO}/src/frontends/paddle/tests/requirements.txt + # see https://github.com/PaddlePaddle/Paddle/issues/55597#issuecomment-1718131420 + wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + apt-get install ./libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb # # Build @@ -230,6 +234,7 @@ jobs: repository: 'openvinotoolkit/testdata' path: ${{ env.MODELS_PATH }} lfs: 'true' + ref: 'master' - name: Download selective build statistics package uses: actions/download-artifact@v3 diff --git a/.github/workflows/linux_cuda.yml b/.github/workflows/linux_cuda.yml deleted file mode 100644 index 7fd90dac00dcf4..00000000000000 --- a/.github/workflows/linux_cuda.yml +++ /dev/null @@ -1,143 +0,0 @@ -name: Linux NVIDIA Plugin (Ubuntu 20.04) -on: - workflow_dispatch: - pull_request: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - branches: - - master - -concurrency: - # github.ref is not unique in post-commit - group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-linux-nvidia - cancel-in-progress: true - -jobs: - Build: - defaults: - run: - shell: bash - runs-on: aks-linux-16-cores - container: - image: openvinogithubactions.azurecr.io/dockerhub/nvidia/cuda:11.8.0-runtime-ubuntu20.04 - volumes: - - /mount/caches:/mount/caches - env: - CMAKE_BUILD_TYPE: 'Release' - CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CUDA_COMPILER_LAUNCHER: ccache - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache - OPENVINO_REPO: /__w/openvino/openvino/openvino - OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib - OV_BUILD_DIR: /__w/openvino/openvino/openvino_build - NVIDIA_BUILD_DIR: /__w/openvino/openvino/nvidia_plugin_build - DEBIAN_FRONTEND: 'noninteractive' - CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G - steps: - - - name: Install Prerequisites - run: | - apt update - apt install -y git curl git git-lfs unzip wget - - - name: Clone OpenVINO - uses: actions/checkout@v4 - with: - path: ${{ env.OPENVINO_REPO }} - submodules: 'true' - - - name: Clone OpenVINO Contrib - uses: actions/checkout@v4 - with: - repository: 'openvinotoolkit/openvino_contrib' - path: ${{ env.OPENVINO_CONTRIB_REPO }} - ref: 'master' - - # - # Dependencies - # - - - name: Install build dependencies - run: | - ${OPENVINO_REPO}/install_build_dependencies.sh - - apt -y --no-install-recommends install unzip wget software-properties-common - - - name: Install CUDA - run: | - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin - mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 - - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" - apt update - apt install -y \ - libcudnn8=8.9.4.*-1+cuda11.8 \ - libcudnn8-dev=8.9.4.*-1+cuda11.8 \ - libcudnn8-samples=8.9.4.*-1+cuda11.8 \ - cuda-runtime-11-8 \ - cuda-11-8 \ - libcutensor1=1.6.1.5-1 \ - libcutensor-dev=1.6.1.5-1 \ - cuda-drivers=520.61.05-1 - - # - # Build - # - - - name: CMake configure - run: | - cmake \ - -G "${{ env.CMAKE_GENERATOR }}" \ - -DENABLE_CPPLINT=OFF \ - -DENABLE_NCC_STYLE=OFF \ - -DENABLE_SYSTEM_PUGIXML=ON \ - -DENABLE_SYSTEM_OPENCL=ON \ - -DENABLE_STRICT_DEPENDENCIES=OFF \ - -DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }} \ - -DENABLE_INTEL_CPU=OFF \ - -DENABLE_INTEL_GPU=OFF \ - -DENABLE_INTEL_GNA=OFF \ - -DENABLE_OV_TF_FRONTEND=OFF \ - -DENABLE_OV_TF_LITE_FRONTEND=OFF \ - -DENABLE_OV_PADDLE_FRONTEND=OFF \ - -DENABLE_OV_PYTORCH_FRONTEND=OFF \ - -DENABLE_OV_ONNX_FRONTEND=OFF \ - -DENABLE_PYTHON=OFF \ - -DENABLE_TESTS=ON \ - -DCPACK_GENERATOR=TGZ \ - -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ - -S ${OPENVINO_REPO} \ - -B ${OV_BUILD_DIR} - - - name: Build - OpenVINO - run: | - cmake --build ${OV_BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --verbose --target ov_dev_targets - - - name: Cmake & Build - NVIDIA Plugin - run: | - cmake \ - -DOpenVINODeveloperPackage_DIR=${OV_BUILD_DIR} \ - -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ - -S ${OPENVINO_CONTRIB_REPO}/modules/nvidia_plugin \ - -B ${NVIDIA_BUILD_DIR} - cmake --build ${NVIDIA_BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --verbose -- ov_nvidia_func_tests ov_nvidia_unit_tests - - - name: Show ccache stats - run: ccache --show-stats diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index f1318923b3e0c7..d5084d7a5d19c6 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -360,7 +360,7 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_lite_frontend_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowLiteFrontend.xml - - name: Transformations Tests + - name: Transformations func tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 \ @@ -372,6 +372,12 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_util_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-commonUtilsTests.xml + - name: Snippets func tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_snippets_func_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SnippetsFuncTests.xml + - name: CPU plugin unit tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh @@ -384,7 +390,13 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_auto_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_unit_tests.xml - - name: Template plugin tests + - name: AUTO func Tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_auto_func_tests --gtest_print_time=1 \ + --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_func_tests.xml + + - name: Template plugin func tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_template_func_tests --gtest_print_time=1 \ @@ -403,11 +415,31 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_capi_test --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OpenVINOCAPITests.xml + - name: AutoBatch unit tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_unit_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_unit_tests.xml + - name: AutoBatch func tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_func_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_func_tests.xml + - name: Proxy Plugin func tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVProxyTests.xml + + - name: Hetero unit tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroUnitTests.xml + + - name: Hetero func tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroFuncTests.xml + - name: Upload Test Results uses: actions/upload-artifact@v3 if: ${{ always() }} @@ -485,7 +517,7 @@ jobs: python3 -m pip install $ov_dev_wheel_name[mxnet,caffe,kaldi,onnx,tensorflow2,pytorch] popd - - name: nGraph and IE Python Bindings Tests + - name: Python API 1.0 Tests run: | python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/pyngraph \ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml \ @@ -501,9 +533,7 @@ jobs: python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino \ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml \ - --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py \ - --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_onnx/test_zoo_models.py \ - --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_onnx/test_backend.py + --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py - name: MO Python API Tests run: | @@ -517,10 +547,22 @@ jobs: TEST_DEVICE: CPU TEST_PRECISION: FP16 + - name: OVC Python API Tests + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + # TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' + source ${{ env.INSTALL_DIR }}/setupvars.sh + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + - name: Model Optimizer unit tests run: | export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}:$PYTHONPATH python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/mo/unit_tests \ + --ignore=${{ env.INSTALL_TEST_DIR }}/mo/unit_tests/mo/front/mxnet \ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-ModelOptimizer.xml - name: PyTorch Layer Tests @@ -533,6 +575,17 @@ jobs: TEST_DEVICE: CPU TEST_PRECISION: FP16 + - name: ONNX Layer Tests + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + - name: TensorFlow 1 Layer Tests - TF FE run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt @@ -568,6 +621,7 @@ jobs: --ir_version=11 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_Activation.xml -k "sigmoid" env: TEST_DEVICE: CPU + TEST_PRECISION: FP16 - name: TensorFlow Lite Layer Tests - TFL FE run: | @@ -577,6 +631,14 @@ jobs: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_lite_tests/ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tfl_fe.xml env: TEST_DEVICE: CPU + TEST_PRECISION: FP16 + + - name: Python ONNX operators tests + run: | + # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately + python3 -m pytest -sv ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests -k 'not cuda' \ + --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx_frontend.xml \ + --ignore=${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests/test_python/test_zoo_models.py - name: Python Frontend tests run: | @@ -587,6 +649,10 @@ jobs: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/py_frontend_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_py_fontend.xml + # TODO: install to 'tests' component via cpack + - name: OVC unit tests + run: python3 -m pytest -s ${{ env.OPENVINO_REPO }}/tools/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml + - name: Upload Test Results uses: actions/upload-artifact@v3 if: ${{ always() }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 493a1e47ba6e0a..6ce891e6767698 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -1,24 +1,27 @@ name: Windows (VS 2022, Python 3.11) on: - workflow_dispatch: - pull_request: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - branches: - - master + schedule: + # at 00:00 on workdays + - cron: '0 0 * * 1,2,3,4,5' +# workflow_dispatch: +# pull_request: +# paths-ignore: +# - '**/docs/**' +# - 'docs/**' +# - '**/**.md' +# - '**.md' +# - '**/layer_tests_summary/**' +# - '**/conformance/**' +# push: +# paths-ignore: +# - '**/docs/**' +# - 'docs/**' +# - '**/**.md' +# - '**.md' +# - '**/layer_tests_summary/**' +# - '**/conformance/**' +# branches: +# - master concurrency: # github.ref is not unique in post-commit @@ -56,6 +59,7 @@ jobs: with: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' + ref: 'master' # # Dependencies @@ -300,7 +304,7 @@ jobs: # Find and install the core OV wheel $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino-*.whl | % { $_.FullName } python3 -m pip install "$ovCoreWheelPath" - + # Find and install the dev OV wheel $ovDevWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino_dev*.whl | % { $_.FullName } python3 -m pip install "$ovDevWheelPath[mxnet,caffe,kaldi,onnx,tensorflow2,pytorch]" @@ -308,21 +312,18 @@ jobs: - name: Python API 1.0 Tests shell: cmd run: | - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/pyngraph ${{ env.PYTHON_STATIC_ARGS }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyngraph/tests_compatibility/test_onnx/test_zoo_models.py --ignore=${{ env.INSTALL_TEST_DIR }}/pyngraph/tests_compatibility/test_onnx/test_backend.py + python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/pyngraph ${{ env.PYTHON_STATIC_ARGS }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyngraph/tests_compatibility/test_onnx/test_zoo_models.py - name: Python API 2.0 Tests shell: cmd run: | - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino ${{ env.PYTHON_STATIC_ARGS }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py + set PYTHONPATH=${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% + python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino ${{ env.PYTHON_STATIC_ARGS }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py - name: Model Optimizer UT shell: cmd run: | - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};${{ env.INSTALL_TEST_DIR }};${{ env.INSTALL_DIR }}\python\python${{ env.PYTHON_VERSION }};%PYTHONPATH% - - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/mo/unit_tests --ignore=${{ env.INSTALL_TEST_DIR }}/mo/unit_tests/mo/front/mxnet --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-ModelOptimizer.xml + python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/mo/unit_tests --ignore=${{ env.INSTALL_TEST_DIR }}/mo/unit_tests/mo/front/mxnet --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-ModelOptimizer.xml # Ticket - 115085 - name: PyTorch Layer Tests @@ -330,32 +331,43 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + env: + TEST_DEVICE: CPU - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% + - name: ONNX Layer Tests + shell: cmd + run: | + python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + :: requires 'unit_tests' from 'tools/mo' + set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml env: TEST_DEVICE: CPU + TEST_PRECISION: FP16 - name: TensorFlow 1 Layer Tests - TF FE shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + :: requires 'unit_tests' from 'tools/mo' set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_fe.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_fe.xml env: TEST_DEVICE: CPU + TEST_PRECISION: FP16 - name: TensorFlow 2 Layer Tests - TF FE shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + :: requires 'unit_tests' from 'tools/mo' set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_fe.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_fe.xml env: TEST_DEVICE: CPU @@ -363,42 +375,30 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_Roll.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_Roll.xml - name: TensorFlow 2 Layer Tests - Legacy FE shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/test_tf2_keras_activation.py --ir_version=11 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_Activation.xml -k "sigmoid" + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/test_tf2_keras_activation.py --ir_version=11 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_Activation.xml -k "sigmoid" env: TEST_DEVICE: CPU + TEST_PRECISION: FP16 - name: TensorFlow Lite Layer Tests - TFL FE shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_lite_tests/ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tfl_fe.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_lite_tests/ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tfl_fe.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 - - name: TensorFlow Lite Layer Tests - TFL FE + - name: Python ONNX operators tests shell: cmd run: | :: Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately - python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - python3 -m pytest ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests -k "not cuda" ^ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx_frontend.xml ^ --ignore=${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests/test_python/test_zoo_models.py @@ -408,8 +408,7 @@ jobs: run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - + :: TODO: remove setupvars.bat from here; currently, it's used for 'test_utils' installed in '/python/openvino' call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml env: TEST_DEVICE: CPU @@ -420,8 +419,7 @@ jobs: run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - + :: TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml env: TEST_DEVICE: CPU @@ -432,10 +430,13 @@ jobs: run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/py_frontend_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_py_fontend.xml + # TODO: install to 'tests' component via cpack + - name: OVC unit tests + shell: cmd + run: python3 -m pytest -s ${{ env.OPENVINO_REPO }}/tools/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml + - name: Upload Test Results uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} @@ -518,7 +519,7 @@ jobs: run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ONNXFrontend.xml - - name: TensorFlow Common tests + - name: TensorFlow Common frontend tests shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_common_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowCommonFrontend.xml @@ -533,7 +534,7 @@ jobs: run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_lite_frontend_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowLiteFrontend.xml - - name: Transformations Tests + - name: Transformations func tests shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-Transformations.xml @@ -568,17 +569,17 @@ jobs: run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_gna_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-GNAUnitTests.xml - - name: AUTO UT + - name: AUTO unit tests shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_auto_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_unit_tests.xml - - name: AUTO FuncTests + - name: AUTO func Tests shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_auto_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_func_tests.xml - - name: Template plugin tests + - name: Template plugin func tests shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_template_func_tests --gtest_print_time=1 --gtest_filter=*smoke* --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateFuncTests.xml @@ -598,12 +599,12 @@ jobs: run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_unit_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_unit_tests.xml - - name: AutoBatch FuncTests + - name: AutoBatch func tests shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_auto_batch_func_tests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_batch_func_tests.xml - - name: Proxy Plugin Tests + - name: Proxy Plugin func tests shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVProxyTests.xml @@ -697,9 +698,11 @@ jobs: name: test-results-functional-cpu path: | ${{ env.INSTALL_TEST_DIR }}/temp/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/*.log ${{ env.INSTALL_TEST_DIR }}/logs/failed/*.log ${{ env.INSTALL_TEST_DIR }}/logs/crashed/*.log ${{ env.INSTALL_TEST_DIR }}/logs/hanged/*.log ${{ env.INSTALL_TEST_DIR }}/logs/interapted/*.log - ${{ env.INSTALL_TEST_DIR }}/logs/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/hash_table.csv + ${{ env.PARALLEL_TEST_CACHE }} if-no-files-found: 'error' diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index fabd763e1a3f2d..93f947ee071df1 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -67,6 +67,7 @@ jobs: repository: 'openvinotoolkit/testdata' path: 'testdata' lfs: 'true' + ref: 'master' # # Dependencies diff --git a/CMakeLists.txt b/CMakeLists.txt index ea3de7994f722e..82277e5c875cfb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,54 +81,55 @@ file(REMOVE "${CMAKE_BINARY_DIR}/ngraphTargets.cmake") file(REMOVE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake") file(REMOVE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") -# remove exported developer targets to force its regeneration -macro(ov_clean_dev_targets) - foreach(component IN LISTS openvino_export_components) - file(REMOVE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake") - file(REMOVE "${CMAKE_BINARY_DIR}/ov_${component}_dev_targets.cmake") - unset(${component} CACHE) - endforeach() - unset(openvino_export_components CACHE) +# remove exported developer targets files to force its regeneration +macro(ov_clean_developer_package_targets) + file(REMOVE "${CMAKE_BINARY_DIR}/inference_engine_developer_package_targets.cmake") + file(REMOVE "${CMAKE_BINARY_DIR}/openvino_developer_package_targets.cmake") + unset(_OPENVINO_DEVELOPER_PACKAGE_TARGETS CACHE) unset(openvino_installed_targets CACHE) endmacro() -ov_clean_dev_targets() +ov_clean_developer_package_targets() -# -# Build -# +function(ov_developer_package_export_targets) + cmake_parse_arguments(EXPORT "" "TARGET;INSTALL_DESTIONATION" "INSTALL_INCLUDE_DIRECTORIES" ${ARGN}) -function(openvino_developer_export_targets) - cmake_parse_arguments(EXPORT "" "COMPONENT" "TARGETS" ${ARGN}) + # to allow exporting of aliased targets with the original names + if(TARGET "${EXPORT_TARGET}") + get_target_property(original_name ${EXPORT_TARGET} ALIASED_TARGET) + if(TARGET "${original_name}") + # replace target with its original name + set(EXPORT_TARGET ${original_name}) + endif() + list(APPEND _OPENVINO_DEVELOPER_PACKAGE_TARGETS ${EXPORT_TARGET}) - if(EXPORT_UNPARSED_ARGUMENTS) - message(FATAL_ERROR "openvino_developer_export_targets has unparsed arguments: ${EXPORT_UNPARSED_ARGUMENTS}") - endif() + if(EXPORT_INSTALL_INCLUDE_DIRECTORIES) + if(NOT EXPORT_INSTALL_DESTIONATION) + set(EXPORT_INSTALL_DESTIONATION "developer_package/include/${EXPORT_TARGET}") + endif() - set(${EXPORT_COMPONENT} "${${EXPORT_COMPONENT}};${EXPORT_TARGETS}") + target_include_directories(${EXPORT_TARGET} INTERFACE "$") - # to allow exporting of aliased targets with the original names - foreach(target_name IN LISTS ${EXPORT_COMPONENT}) - if(TARGET "${target_name}") - get_target_property(original_name ${target_name} ALIASED_TARGET) - if(TARGET "${original_name}") - list(REMOVE_ITEM ${EXPORT_COMPONENT} ${target_name}) - list(APPEND ${EXPORT_COMPONENT} ${original_name}) - endif() + foreach(install_dir IN LISTS EXPORT_INSTALL_INCLUDE_DIRECTORIES) + install(DIRECTORY "${install_dir}" + DESTINATION "${EXPORT_INSTALL_DESTIONATION}" + COMPONENT developer_package EXCLUDE_FROM_ALL) + endforeach() endif() - endforeach() - - list(REMOVE_DUPLICATES ${EXPORT_COMPONENT}) - set(${EXPORT_COMPONENT} "${${EXPORT_COMPONENT}}" CACHE INTERNAL - "A list of OpenVINO ${EXPORT_COMPONENT} exported targets" FORCE) + else() + message(FATAL_ERROR "Internal error: ${target_name} does not represent a cmake target") + endif() - list(APPEND openvino_export_components ${EXPORT_COMPONENT}) - list(REMOVE_DUPLICATES openvino_export_components) - set(openvino_export_components "${openvino_export_components}" CACHE INTERNAL - "A list of OpenVINO exported components" FORCE) + list(REMOVE_DUPLICATES _OPENVINO_DEVELOPER_PACKAGE_TARGETS) + set(_OPENVINO_DEVELOPER_PACKAGE_TARGETS "${_OPENVINO_DEVELOPER_PACKAGE_TARGETS}" CACHE INTERNAL + "A list of OpenVINO Developer Package exported targets" FORCE) endfunction() -# add target with processed tests model zoo +# +# Build +# + if(ENABLE_TESTS) + # add target with processed tests model zoo include(cmake/test_model_zoo.cmake) endif() diff --git a/README.md b/README.md index adc6f9f2b965ea..bfc4a722c2680d 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ OpenVINO™ is an open-source toolkit for optimizing and deploying AI inference. - Reduce resource demands and efficiently deploy on a range of Intel® platforms from edge to cloud -This open-source version includes several components: namely [Model Optimizer], [OpenVINO™ Runtime], [Post-Training Optimization Tool], as well as CPU, GPU, GNA, multi device and heterogeneous plugins to accelerate deep learning inference on Intel® CPUs and Intel® Processor Graphics. +This open-source version includes several components: namely [OpenVINO Model Converter (OVC)], [OpenVINO™ Runtime], as well as CPU, GPU, GNA, multi device and heterogeneous plugins to accelerate deep learning inference on Intel® CPUs and Intel® Processor Graphics. It supports pre-trained models from [Open Model Zoo], along with 100+ open source and public models in popular formats such as TensorFlow, ONNX, PaddlePaddle, MXNet, Caffe, Kaldi. @@ -48,8 +48,7 @@ source and public models in popular formats such as TensorFlow, ONNX, PaddlePadd * [python](./src/bindings/python) - Python API for OpenVINO™ Runtime * [Plugins](./src/plugins) - contains OpenVINO plugins which are maintained in open-source by the OpenVINO team. For more information, take a look at the [list of supported devices](#supported-hardware-matrix). * [Frontends](./src/frontends) - contains available OpenVINO frontends that allow reading models from the native framework format. -* [Model Optimizer] - is a cross-platform command-line tool that facilitates the transition between training and deployment environments, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices. -* [Post-Training Optimization Tool] - is designed to accelerate the inference of deep learning models by applying special methods without model retraining or fine-tuning, for example, post-training 8-bit quantization. +* [OpenVINO Model Converter (OVC)] - is a cross-platform command-line tool that facilitates the transition between training and deployment environments, and adjusts deep learning models for optimal execution on end-point target devices. * [Samples] - applications in C, C++ and Python languages that show basic OpenVINO use cases. ## Supported Hardware matrix @@ -62,7 +61,7 @@ The OpenVINO™ Runtime can infer models on different hardware devices. This sec Device Plugin Library - ShortDescription + Short Description @@ -70,7 +69,7 @@ The OpenVINO™ Runtime can infer models on different hardware devices. This sec CPU Intel CPU openvino_intel_cpu_plugin - Intel Xeon with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel Core Processors with Intel AVX2, Intel Atom Processors with Intel® Streaming SIMD Extensions (Intel® SSE) + Intel Xeon with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel Core Processors with Intel AVX2, Intel Atom Processors with Intel® Streaming SIMD Extensions (Intel® SSE), Intel® Advanced Matrix Extensions (Intel® AMX) ARM CPU @@ -98,7 +97,7 @@ OpenVINO™ Toolkit also contains several plugins which simplify loading models Plugin Library - ShortDescription + Short Description @@ -196,6 +195,5 @@ Report questions, issues and suggestions, using: [Open Model Zoo]:https://github.com/openvinotoolkit/open_model_zoo [OpenVINO™ Runtime]:https://docs.openvino.ai/2023.1/openvino_docs_OV_UG_OV_Runtime_User_Guide.html -[Model Optimizer]:https://docs.openvino.ai/2023.1/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html -[Post-Training Optimization Tool]:https://docs.openvino.ai/2023.1/pot_introduction.html +[OpenVINO Model Converter (OVC)]:https://docs.openvino.ai/2023.1/openvino_docs_model_processing_introduction.html#convert-a-model-in-cli-ovc [Samples]:https://github.com/openvinotoolkit/openvino/tree/master/samples diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 1d4210f300b058..5c86bdea57620c 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -5,7 +5,7 @@ cmake_policy(SET CMP0054 NEW) # TODO: fix it, outside of source dir MO cannot find TBB dependency -set_temp_directory(TEMP "${CMAKE_SOURCE_DIR}") +ov_set_temp_directory(TEMP "${CMAKE_SOURCE_DIR}") ## Intel OMP package if(THREADING STREQUAL "OMP") @@ -71,12 +71,16 @@ function(ov_download_tbb) if(NOT DEFINED ENV{TBBROOT} AND (DEFINED ENV{TBB_DIR} OR DEFINED TBB_DIR)) if(DEFINED ENV{TBB_DIR}) - set(TEMP_ROOT $ENV{TBB_DIR}) - elseif (DEFINED TBB_DIR) - set(TEMP_ROOT ${TBB_DIR}) + set(TBB_DIR "$ENV{TBB_DIR}") endif() + set(TEMP_ROOT "${TBB_DIR}") while(NOT EXISTS "${TEMP_ROOT}/include") - get_filename_component(TEMP_ROOT ${TEMP_ROOT} PATH) + get_filename_component(TEMP_ROOT_PARENT ${TEMP_ROOT} PATH) + if(TEMP_ROOT_PARENT STREQUAL TEMP_ROOT) + # to prevent recursion + message(FATAL_ERROR "${TBB_DIR} does not contain 'include' folder. Please, unset TBB_DIR") + endif() + set(TEMP_ROOT "${TEMP_ROOT_PARENT}") endwhile() set(TBBROOT ${TEMP_ROOT}) endif() diff --git a/cmake/developer_package/OpenVINODeveloperScriptsConfig.cmake b/cmake/developer_package/OpenVINODeveloperScriptsConfig.cmake index 3996f373156d89..bc512b9b229b02 100644 --- a/cmake/developer_package/OpenVINODeveloperScriptsConfig.cmake +++ b/cmake/developer_package/OpenVINODeveloperScriptsConfig.cmake @@ -27,11 +27,12 @@ endmacro() set(OLD_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH "${OpenVINODeveloperScripts_DIR}") -function(set_ci_build_number) - set(repo_root "${CMAKE_SOURCE_DIR}") +function(ov_set_ci_build_number) include(version) - foreach(var CI_BUILD_NUMBER OpenVINO_VERSION OpenVINO_SOVERSION OpenVINO_VERSION_SUFFIX OpenVINO_VERSION_BUILD - OpenVINO_VERSION_MAJOR OpenVINO_VERSION_MINOR OpenVINO_VERSION_PATCH) + ov_parse_ci_build_number("${CMAKE_SOURCE_DIR}") + + foreach(var CI_BUILD_NUMBER OpenVINO_VERSION OpenVINO_SOVERSION OpenVINO_VERSION_SUFFIX + OpenVINO_VERSION_MAJOR OpenVINO_VERSION_MINOR OpenVINO_VERSION_PATCH OpenVINO_VERSION_BUILD) if(NOT DEFINED ${var}) message(FATAL_ERROR "${var} version component is not defined") endif() @@ -44,7 +45,7 @@ ov_set_if_not_defined(Python3_FIND_STRATEGY LOCATION) include(features) -set_ci_build_number() +ov_set_ci_build_number() # # Detect target @@ -139,10 +140,13 @@ endif() # allow to override default OUTPUT_ROOT root if(NOT DEFINED OUTPUT_ROOT) - if(NOT DEFINED OpenVINO_SOURCE_DIR) - message(FATAL_ERROR "OpenVINO_SOURCE_DIR is not defined") + if(DEFINED OpenVINO_SOURCE_DIR) + # For BW compatiblity, when extra modules are built separately + # but still write its artifacts to OpenVINO source directory + set(OUTPUT_ROOT ${OpenVINO_SOURCE_DIR}) + else() + set(OUTPUT_ROOT ${CMAKE_SOURCE_DIR}) endif() - set(OUTPUT_ROOT ${OpenVINO_SOURCE_DIR}) endif() # Enable postfixes for Debug/Release builds diff --git a/cmake/developer_package/add_target_helpers.cmake b/cmake/developer_package/add_target_helpers.cmake index c52b393d7bbe74..92f4afbc23bbbe 100644 --- a/cmake/developer_package/add_target_helpers.cmake +++ b/cmake/developer_package/add_target_helpers.cmake @@ -9,7 +9,6 @@ ov_add_target( NAME core_lib ADD_CPPLINT ADD_CLANG_FORMAT - DEVELOPER_PACKAGE TYPE ROOT ${CMAKE_CURRENT_SOURCE_DIR} ADDITIONAL_SOURCE_DIRS @@ -44,9 +43,6 @@ function(ov_add_target) NAME # name of target ROOT # root directory to be used for recursive search of source files ) - set(oneValueOptionalArgs - DEVELOPER_PACKAGE # Enables exporting of the target through the developer package - ) set(multiValueArgs INCLUDES # Extra include directories LINK_LIBRARIES # Link libraries (in form of target name or file name) @@ -58,7 +54,7 @@ function(ov_add_target) LINK_LIBRARIES_WHOLE_ARCHIVE # list of static libraries to link, each object file should be used and not discarded LINK_FLAGS # list of extra commands to linker ) - cmake_parse_arguments(ARG "${options}" "${oneValueRequiredArgs};${oneValueOptionalArgs}" "${multiValueArgs}" ${ARGN} ) + cmake_parse_arguments(ARG "${options}" "${oneValueRequiredArgs}" "${multiValueArgs}" ${ARGN} ) # sanity checks foreach(argName IN LISTS oneValueRequiredArgs) @@ -128,11 +124,6 @@ function(ov_add_target) # code style ov_add_clang_format_target(${ARG_NAME}_clang FOR_TARGETS ${ARG_NAME}) endif() - if (ARG_DEVELOPER_PACKAGE) - # developer package - openvino_developer_export_targets(COMPONENT ${ARG_DEVELOPER_PACKAGE} - TARGETS ${ARG_NAME}) - endif() if(WIN32) # Provide default compile pdb name equal to target name set_target_properties(${ARG_NAME} PROPERTIES COMPILE_PDB_NAME ${ARG_NAME}) @@ -181,7 +172,9 @@ function(ov_add_test_target) else() add_test(NAME ${ARG_NAME} COMMAND ${ARG_NAME}) endif() - set_property(TEST ${ARG_NAME} PROPERTY LABELS ${ARG_LABELS}) + if(ARG_LABELS) + set_property(TEST ${ARG_NAME} PROPERTY LABELS ${ARG_LABELS}) + endif() install(TARGETS ${ARG_NAME} RUNTIME DESTINATION tests diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake index 7d98b40c3ce81d..c0c878e0183eb0 100644 --- a/cmake/developer_package/compile_flags/os_flags.cmake +++ b/cmake/developer_package/compile_flags/os_flags.cmake @@ -329,15 +329,15 @@ endif() file(RELATIVE_PATH OV_RELATIVE_BIN_PATH ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_SOURCE_DIR}) -if(${CMAKE_VERSION} VERSION_LESS "3.20") - file(TO_NATIVE_PATH ${OpenVINO_SOURCE_DIR} OV_NATIVE_PROJECT_ROOT_DIR) +if(CMAKE_VERSION VERSION_LESS 3.20) + file(TO_NATIVE_PATH ${CMAKE_SOURCE_DIR} OV_NATIVE_PROJECT_ROOT_DIR) file(TO_NATIVE_PATH ${OV_RELATIVE_BIN_PATH} NATIVE_OV_RELATIVE_BIN_PATH) else() - cmake_path(NATIVE_PATH OpenVINO_SOURCE_DIR OV_NATIVE_PROJECT_ROOT_DIR) + cmake_path(NATIVE_PATH CMAKE_SOURCE_DIR OV_NATIVE_PROJECT_ROOT_DIR) cmake_path(NATIVE_PATH OV_RELATIVE_BIN_PATH NATIVE_OV_RELATIVE_BIN_PATH) endif() -file(RELATIVE_PATH OV_NATIVE_PARENT_PROJECT_ROOT_DIR "${OpenVINO_SOURCE_DIR}/.." ${OpenVINO_SOURCE_DIR}) +file(RELATIVE_PATH OV_NATIVE_PARENT_PROJECT_ROOT_DIR "${CMAKE_SOURCE_DIR}/.." ${CMAKE_SOURCE_DIR}) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # @@ -392,7 +392,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") if(NOT DEFINED ENV{GITHUB_ACTIONS}) add_compile_options( "$<$:/d1trimfile:${OV_NATIVE_PROJECT_ROOT_DIR}\\>" - "$<$:/d1trimfile:${OpenVINO_SOURCE_DIR}/>") + "$<$:/d1trimfile:${CMAKE_SOURCE_DIR}/>") endif() # diff --git a/cmake/developer_package/options.cmake b/cmake/developer_package/options.cmake index 75b9c886894631..4506d85a027f92 100644 --- a/cmake/developer_package/options.cmake +++ b/cmake/developer_package/options.cmake @@ -43,7 +43,7 @@ macro(ov_option_enum variable description value) endmacro() function (ov_print_enabled_features) - if(NOT COMMAND set_ci_build_number) + if(NOT COMMAND ov_set_ci_build_number) message(FATAL_ERROR "CI_BUILD_NUMBER is not set yet") endif() diff --git a/cmake/developer_package/packaging/nsis.cmake b/cmake/developer_package/packaging/nsis.cmake index 901e34f97820bb..4174037af74f39 100644 --- a/cmake/developer_package/packaging/nsis.cmake +++ b/cmake/developer_package/packaging/nsis.cmake @@ -5,6 +5,8 @@ macro(ov_nsis_specific_settings) # installation directory set(CPACK_PACKAGE_INSTALL_DIRECTORY "Intel") + # License to be embedded in the installer + set(CPACK_RESOURCE_FILE_LICENSE "${OpenVINO_SOURCE_DIR}/LICENSE") # TODO: provide icons # set(CPACK_NSIS_MUI_ICON "") diff --git a/cmake/developer_package/packaging/packaging.cmake b/cmake/developer_package/packaging/packaging.cmake index 505565f55da5d7..2279580040f736 100644 --- a/cmake/developer_package/packaging/packaging.cmake +++ b/cmake/developer_package/packaging/packaging.cmake @@ -193,8 +193,6 @@ macro(ov_cpack) set(CPACK_PACKAGE_CONTACT "OpenVINO Developers ") set(CPACK_VERBATIM_VARIABLES ON) set(CPACK_COMPONENTS_ALL ${ARGN}) - # TODO: set proper license file for Windows installer - set(CPACK_RESOURCE_FILE_LICENSE "${OpenVINO_SOURCE_DIR}/LICENSE") # default permissions for directories creation set(CMAKE_INSTALL_DEFAULT_DIRECTORY_PERMISSIONS diff --git a/cmake/developer_package/version.cmake b/cmake/developer_package/version.cmake index 0353e3a52a8617..1b71befe448b76 100644 --- a/cmake/developer_package/version.cmake +++ b/cmake/developer_package/version.cmake @@ -4,50 +4,51 @@ find_package(Git QUIET) -function (branchName VAR) - if(NOT DEFINED repo_root) - message(FATAL_ERROR "repo_root is not defined") - endif() +function(ov_branch_name VAR REPO_ROOT) if(GIT_FOUND) execute_process( COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY ${repo_root} + WORKING_DIRECTORY ${REPO_ROOT} OUTPUT_VARIABLE GIT_BRANCH OUTPUT_STRIP_TRAILING_WHITESPACE) set (${VAR} ${GIT_BRANCH} PARENT_SCOPE) endif() endfunction() -function (commitHash VAR) - if(NOT DEFINED repo_root) - message(FATAL_ERROR "repo_root is not defined") - endif() +function(ov_commit_hash VAR REPO_ROOT) if(GIT_FOUND) execute_process( COMMAND ${GIT_EXECUTABLE} rev-parse --short=11 HEAD - WORKING_DIRECTORY ${repo_root} + WORKING_DIRECTORY ${REPO_ROOT} OUTPUT_VARIABLE GIT_COMMIT_HASH OUTPUT_STRIP_TRAILING_WHITESPACE) set (${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE) endif() endfunction() -function (commitNumber VAR) - if(NOT DEFINED repo_root) - message(FATAL_ERROR "repo_root is not defined") - endif() +function(ov_commit_number VAR REPO_ROOT) if(GIT_FOUND) execute_process( COMMAND ${GIT_EXECUTABLE} rev-list --count --first-parent HEAD - WORKING_DIRECTORY ${repo_root} + WORKING_DIRECTORY ${REPO_ROOT} OUTPUT_VARIABLE GIT_COMMIT_NUMBER OUTPUT_STRIP_TRAILING_WHITESPACE) set (${VAR} ${GIT_COMMIT_NUMBER} PARENT_SCOPE) + else() + # set zeros since git is not available + set (${VAR} "000" PARENT_SCOPE) endif() endfunction() -macro(ov_parse_ci_build_number) - set(OpenVINO_VERSION_BUILD 000) +macro(ov_parse_ci_build_number repo_root) + # provides OpenVINO version + # 1. If CI_BUILD_NUMBER is defined, parses this information + # 2. Otherwise, either: + # - parses openvino/core/version.hpp + # - takes from OpenVINOConfig-version.cmake in case of relocatable Developer package + if (DEFINED ENV{CI_BUILD_NUMBER}) + set(CI_BUILD_NUMBER $ENV{CI_BUILD_NUMBER}) + endif() if(CI_BUILD_NUMBER MATCHES "^([0-9]+)\.([0-9]+)\.([0-9]+)\-([0-9]+)\-.*") set(OpenVINO_VERSION_MAJOR ${CMAKE_MATCH_1}) @@ -63,12 +64,9 @@ macro(ov_parse_ci_build_number) message(FATAL_ERROR "Failed to parse CI_BUILD_NUMBER which is ${CI_BUILD_NUMBER}") endif() - if(NOT DEFINED repo_root) - message(FATAL_ERROR "repo_root is not defined") - endif() - - macro(ov_get_hpp_version) + function(ov_compare_version_with_headers) if(NOT DEFINED OpenVINO_SOURCE_DIR) + # if we are not in OpenVINO source tree, let's ignore this comparison return() endif() @@ -101,30 +99,29 @@ macro(ov_parse_ci_build_number) endif() endforeach() - # detect commit number - commitNumber(OpenVINO_VERSION_BUILD_HPP) - if(OpenVINO_VERSION_BUILD STREQUAL "000" AND DEFINED OpenVINO_VERSION_BUILD_HPP) - set(OpenVINO_VERSION_BUILD "${OpenVINO_VERSION_BUILD_HPP}") - else() - set(OpenVINO_VERSION_BUILD_HPP "${OpenVINO_VERSION_BUILD}") - endif() - - set(ov_hpp_version_is_found ON) - endmacro() - - # detect OpenVINO version via openvino/core/version.hpp and ie_version.hpp - ov_get_hpp_version() - - if(ov_hpp_version_is_found) - foreach(var OpenVINO_VERSION_MAJOR OpenVINO_VERSION_MINOR OpenVINO_VERSION_PATCH OpenVINO_VERSION_BUILD) + foreach(var OpenVINO_VERSION_MAJOR OpenVINO_VERSION_MINOR OpenVINO_VERSION_PATCH) if(DEFINED ${var} AND NOT ${var} EQUAL ${var}_HPP) message(FATAL_ERROR "${var} parsed from CI_BUILD_NUMBER (${${var}}) \ and from openvino/core/version.hpp (${${var}_HPP}) are different") else() # CI_BUILD_NUMBER is not defined well, take info from openvino/core/version.hpp as a baseline - set(${var} ${${var}_HPP}) + set(${var} ${${var}_HPP} PARENT_SCOPE) endif() endforeach() + endfunction() + + # detect OpenVINO version via openvino/core/version.hpp and ie_version.hpp + ov_compare_version_with_headers() + + # detect commit number + ov_commit_number(OpenVINO_VERSION_BUILD_FROM_GIT "${repo_root}") + + if(OpenVINO_VERSION_BUILD AND NOT OpenVINO_VERSION_BUILD STREQUAL OpenVINO_VERSION_BUILD_FROM_GIT) + # TODO: replace with FATAL_ERROR once NPU version will be discussed + message(WARNING "OpenVINO_VERSION_BUILD parsed from CI_BUILD_NUMBER (${OpenVINO_VERSION_BUILD}) \ + and determined by git (${OpenVINO_VERSION_BUILD_FROM_GIT}) are different") + else() + set(OpenVINO_VERSION_BUILD "${OpenVINO_VERSION_BUILD_FROM_GIT}") endif() set(OpenVINO_SOVERSION "${OpenVINO_VERSION_MAJOR}${OpenVINO_VERSION_MINOR}${OpenVINO_VERSION_PATCH}") @@ -140,8 +137,8 @@ macro(ov_parse_ci_build_number) if(NOT the_whole_version_is_defined_by_ci) # create CI_BUILD_NUMBER - branchName(GIT_BRANCH) - commitHash(GIT_COMMIT_HASH) + ov_branch_name(GIT_BRANCH "${repo_root}") + ov_commit_hash(GIT_COMMIT_HASH "${repo_root}") if(NOT GIT_BRANCH STREQUAL "master") set(GIT_BRANCH_POSTFIX "-${GIT_BRANCH}") @@ -157,14 +154,6 @@ macro(ov_parse_ci_build_number) endif() endmacro() -# provides OpenVINO version -# 1. If CI_BUILD_NUMBER is defined, parses this information -# 2. Otherwise, parses openvino/core/version.hpp -if (DEFINED ENV{CI_BUILD_NUMBER}) - set(CI_BUILD_NUMBER $ENV{CI_BUILD_NUMBER}) -endif() -ov_parse_ci_build_number() - macro (addVersionDefines FILE) message(WARNING "'addVersionDefines' is deprecated. Please, use 'ov_add_version_defines'") diff --git a/cmake/extra_modules.cmake b/cmake/extra_modules.cmake index afc1dc335b3e55..6c392fcc6eed12 100644 --- a/cmake/extra_modules.cmake +++ b/cmake/extra_modules.cmake @@ -9,13 +9,10 @@ function(ie_generate_dev_package_config) set(OpenCV_FOUND OFF) endif() - foreach(component IN LISTS openvino_export_components) - # export all targets with prefix and use them during extra modules build - export(TARGETS ${${component}} NAMESPACE IE:: - APPEND FILE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake") - list(APPEND all_dev_targets ${${component}}) - endforeach() - add_custom_target(ie_dev_targets DEPENDS ${all_dev_targets}) + # export all targets with prefix and use them during extra modules build + export(TARGETS ${_OPENVINO_DEVELOPER_PACKAGE_TARGETS} NAMESPACE IE:: + APPEND FILE "${CMAKE_BINARY_DIR}/inference_engine_developer_package_targets.cmake") + add_custom_target(ie_dev_targets DEPENDS ${_OPENVINO_DEVELOPER_PACKAGE_TARGETS}) set(PATH_VARS "OpenVINO_SOURCE_DIR") if(ENABLE_SAMPLES OR ENABLE_TESTS) @@ -44,20 +41,20 @@ function(ov_generate_dev_package_config) set(OpenCV_FOUND OFF) endif() - foreach(component IN LISTS openvino_export_components) - # filter out targets which are installed by OpenVINOConfig.cmake static build case - set(exported_targets) - foreach(target IN LISTS ${component}) - if(NOT target IN_LIST openvino_installed_targets) - list(APPEND exported_targets ${target}) - endif() - endforeach() - # export all developer targets with prefix and use them during extra modules build - export(TARGETS ${exported_targets} NAMESPACE openvino:: - APPEND FILE "${CMAKE_BINARY_DIR}/ov_${component}_dev_targets.cmake") - list(APPEND all_dev_targets ${${component}}) - endforeach() - add_custom_target(ov_dev_targets DEPENDS ${all_dev_targets}) + # create a helper target to build all developer package targets + add_custom_target(ov_dev_targets DEPENDS ${_OPENVINO_DEVELOPER_PACKAGE_TARGETS}) + + # filter out targets which are installed by OpenVINOConfig.cmake static build case + if(openvino_installed_targets) + list(REMOVE_ITEM _OPENVINO_DEVELOPER_PACKAGE_TARGETS ${openvino_installed_targets}) + endif() + # export all developer targets with prefix and use them during extra modules build + export(TARGETS ${_OPENVINO_DEVELOPER_PACKAGE_TARGETS} NAMESPACE openvino:: + APPEND FILE "${CMAKE_BINARY_DIR}/openvino_developer_package_targets.cmake") + + # + # OpenVINODeveloperPackageConfig.cmake for build tree + # set(PATH_VARS "OpenVINO_SOURCE_DIR") if(ENABLE_SAMPLES OR ENABLE_TESTS) @@ -77,38 +74,91 @@ function(ov_generate_dev_package_config) configure_file("${OpenVINO_SOURCE_DIR}/cmake/templates/OpenVINOConfig-version.cmake.in" "${CMAKE_BINARY_DIR}/OpenVINODeveloperPackageConfig-version.cmake" @ONLY) + + # + # OpenVINODeveloperPackageConfig.cmake for installation tree + # + + set(DEV_PACKAGE_ROOT_DIR developer_package) + set(DEV_PACKAGE_CMAKE_DIR ${DEV_PACKAGE_ROOT_DIR}/cmake) + set(DEVELOPER_PACKAGE_COMPONENT developer_package) + set(DEVELOPER_PACKAGE_EXPORT_SET OpenVINODeveloperTargets) + + # create and install main developer package config files + configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in" + "${OpenVINO_BINARY_DIR}/share/OpenVINODeveloperPackageConfig.cmake" + INSTALL_DESTINATION ${DEV_PACKAGE_CMAKE_DIR} + NO_CHECK_REQUIRED_COMPONENTS_MACRO) + + configure_file("${OpenVINO_SOURCE_DIR}/cmake/templates/OpenVINOConfig-version.cmake.in" + "${OpenVINO_BINARY_DIR}/share/OpenVINODeveloperPackageConfig-version.cmake" + @ONLY) + + install(FILES "${OpenVINO_BINARY_DIR}/share/OpenVINODeveloperPackageConfig.cmake" + "${OpenVINO_BINARY_DIR}/share/OpenVINODeveloperPackageConfig-version.cmake" + DESTINATION ${DEV_PACKAGE_CMAKE_DIR} + COMPONENT ${DEVELOPER_PACKAGE_COMPONENT} + EXCLUDE_FROM_ALL) + + # Install whole 'cmake/developer_package' folder + install(DIRECTORY "${OpenVINODeveloperScripts_DIR}/" + DESTINATION "${DEV_PACKAGE_CMAKE_DIR}" + COMPONENT ${DEVELOPER_PACKAGE_COMPONENT} + EXCLUDE_FROM_ALL) + + # Install CMakeLists.txt to read cache variables from + install(FILES "${OpenVINO_BINARY_DIR}/CMakeCache.txt" + DESTINATION ${DEV_PACKAGE_CMAKE_DIR} + COMPONENT ${DEVELOPER_PACKAGE_COMPONENT} + EXCLUDE_FROM_ALL) + + # install developer package targets + install(TARGETS ${_OPENVINO_DEVELOPER_PACKAGE_TARGETS} EXPORT ${DEVELOPER_PACKAGE_EXPORT_SET} + RUNTIME DESTINATION ${DEV_PACKAGE_ROOT_DIR}/bin COMPONENT ${DEVELOPER_PACKAGE_COMPONENT} EXCLUDE_FROM_ALL + ARCHIVE DESTINATION ${DEV_PACKAGE_ROOT_DIR}/lib COMPONENT ${DEVELOPER_PACKAGE_COMPONENT} EXCLUDE_FROM_ALL + LIBRARY DESTINATION ${DEV_PACKAGE_ROOT_DIR}/lib COMPONENT ${DEVELOPER_PACKAGE_COMPONENT} EXCLUDE_FROM_ALL) + + install(EXPORT ${DEVELOPER_PACKAGE_EXPORT_SET} + FILE OpenVINODeveloperPackageTargets.cmake + NAMESPACE openvino:: + DESTINATION ${DEV_PACKAGE_ROOT_DIR}/cmake + COMPONENT ${DEVELOPER_PACKAGE_COMPONENT} + EXCLUDE_FROM_ALL) + + # Note: that OpenCV and gflags are explicitly not installed to simplify relocatable + # OpenVINO Developer package maintainance. OpenVINO_SOURCE_DIR is also unvailable, because + # relocatable developer package can be used on a different machine where OpenVINO repo is not available endfunction() # # Add extra modules # -function(register_extra_modules) +function(_ov_register_extra_modules) set(InferenceEngineDeveloperPackage_DIR "${CMAKE_CURRENT_BINARY_DIR}/build-modules") set(OpenVINODeveloperPackage_DIR "${CMAKE_BINARY_DIR}/build-modules") set(OpenVINO_DIR "${CMAKE_BINARY_DIR}") - function(generate_fake_dev_package NS) + function(_ov_generate_fake_developer_package NS) if(NS STREQUAL "openvino") set(devconfig_file "${OpenVINODeveloperPackage_DIR}/OpenVINODeveloperPackageConfig.cmake") else() set(devconfig_file "${InferenceEngineDeveloperPackage_DIR}/InferenceEngineDeveloperPackageConfig.cmake") endif() - file(REMOVE "${devconfig_file}") + file(REMOVE "${devconfig_file}") file(WRITE "${devconfig_file}" "\# !! AUTOGENERATED: DON'T EDIT !!\n\n") - foreach(targets_list IN LISTS ${openvino_export_components}) - foreach(target IN LISTS targets_list) - file(APPEND "${devconfig_file}" "if(NOT TARGET ${NS}::${target}) - add_library(${NS}::${target} ALIAS ${target}) + foreach(exported_target IN LISTS _OPENVINO_DEVELOPER_PACKAGE_TARGETS) + file(APPEND "${devconfig_file}" "if(NOT TARGET ${NS}::${exported_target}) + add_library(${NS}::${exported_target} ALIAS ${exported_target}) endif()\n") - endforeach() endforeach() endfunction() - generate_fake_dev_package("openvino") - generate_fake_dev_package("IE") + _ov_generate_fake_developer_package("openvino") + # TODO: remove with API 1.0 removal + _ov_generate_fake_developer_package("IE") # detect where OPENVINO_EXTRA_MODULES contains folders with CMakeLists.txt # other folders are supposed to have sub-folders with CMakeLists.txt @@ -155,21 +205,18 @@ endfunction() # Extra modules support # -# this InferenceEngineDeveloperPackageConfig.cmake is not used -# during extra modules build since it's generated after modules -# are configured +# this OpenVINODeveloperPackageConfig.cmake is not used during extra modules build +# since it's generated after modules are configured ie_generate_dev_package_config() ov_generate_dev_package_config() # extra modules must be registered after inference_engine library # and all other OpenVINO Core libraries are creared -# because 'register_extra_modules' creates fake InferenceEngineDeveloperPackageConfig.cmake +# because '_ov_register_extra_modules' creates fake InferenceEngineDeveloperPackageConfig.cmake # with all imported developer targets -register_extra_modules() +_ov_register_extra_modules() -# for static libraries case we need to generate final ov_plugins.hpp -# with all the information about plugins +# we need to generate final ov_plugins.hpp with all the information about plugins ov_generate_plugins_hpp() - -# used for static build +# we need to generate final ov_frontends.hpp with all the information about frontends ov_generate_frontends_hpp() diff --git a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in index e197597487b61c..a98b4207e285d2 100644 --- a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in +++ b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in @@ -88,9 +88,7 @@ endif() _ov_find_tbb() -foreach(component @openvino_export_components@) - include("${CMAKE_CURRENT_LIST_DIR}/${component}_dev_targets.cmake") -endforeach() +include("${CMAKE_CURRENT_LIST_DIR}/inference_engine_developer_package_targets.cmake") if(TARGET IE::ov_core_dev AND NOT TARGET openvino::core::dev) add_library(openvino::core::dev INTERFACE IMPORTED) diff --git a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in index f78e31ce635d81..3620bcd091dab5 100644 --- a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in +++ b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in @@ -77,13 +77,7 @@ find_dependency(OpenVINO _ov_find_tbb() _ov_find_pugixml() -foreach(component @openvino_export_components@) - # TODO: remove legacy targets from some tests - # string(FIND "${component}" "_legacy" index) - # if (index EQUAL -1) - include("${CMAKE_CURRENT_LIST_DIR}/ov_${component}_dev_targets.cmake") - # endif() -endforeach() +include("${CMAKE_CURRENT_LIST_DIR}/openvino_developer_package_targets.cmake") # inherit OpenCV from main OpenVINO project if enabled if("@OpenCV_FOUND@") diff --git a/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in new file mode 100644 index 00000000000000..a4cdb93d387c58 --- /dev/null +++ b/cmake/templates/OpenVINODeveloperPackageConfigRelocatable.cmake.in @@ -0,0 +1,74 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +# Variables to export in plugin's projects + +set(ov_options "@OV_OPTIONS@") +list(APPEND ov_options CPACK_GENERATOR) + +if(APPLE) + list(APPEND ov_options CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET) +endif() + +get_property(_OV_GENERATOR_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(_OV_GENERATOR_MULTI_CONFIG) + list(APPEND ov_options CMAKE_CONFIGURATION_TYPES) + if(CMAKE_GENERATOR MATCHES "^Ninja Multi-Config$") + list(APPEND ov_options CMAKE_DEFAULT_BUILD_TYPE) + endif() +else() + list(APPEND ov_options CMAKE_BUILD_TYPE) +endif() +unset(_OV_GENERATOR_MULTI_CONFIG) + +file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path) + +message(STATUS "The following CMake options are exported from OpenVINO Developer package") +message(" ") +foreach(option IN LISTS ov_options) + if(NOT DEFINED "${option}") + load_cache("${cache_path}" READ_WITH_PREFIX "" ${option}) + endif() + message(" ${option}: ${${option}}") +endforeach() +message(" ") + +# Restore TBB installation directory (requires for proper LC_RPATH on macOS with SIP) +load_cache("${cache_path}" READ_WITH_PREFIX "" TBB_INSTALL_DIR) + +# activate generation of plugins.xml +set(ENABLE_PLUGINS_XML ON) + +# Disable warning as error for private components +set(CMAKE_COMPILE_WARNING_AS_ERROR OFF) + +# +# Content +# + +# OpenVINO_DIR is supposed to be set as an environment variable +find_dependency(OpenVINO) + +find_dependency(OpenVINODeveloperScripts + PATHS "${CMAKE_CURRENT_LIST_DIR}" + NO_CMAKE_FIND_ROOT_PATH + NO_DEFAULT_PATH) + +_ov_find_tbb() +_ov_find_pugixml() + +include("${CMAKE_CURRENT_LIST_DIR}/OpenVINODeveloperPackageTargets.cmake") +# +# Extra Compile Flags +# + +# don't fail on strict compilation options in 3rd party modules +ov_dev_package_no_errors() + +# Don't threat deprecated API warnings as errors in 3rd party apps +ov_deprecated_no_errors() diff --git a/docs/OV_Runtime_UG/auto_device_selection.md b/docs/OV_Runtime_UG/auto_device_selection.md index 6b2385683494da..234197a6488b3e 100644 --- a/docs/OV_Runtime_UG/auto_device_selection.md +++ b/docs/OV_Runtime_UG/auto_device_selection.md @@ -56,11 +56,10 @@ The logic behind the choice is as follows: To put it simply, when loading the model to the first device on the list fails, AUTO will try to load it to the next device in line, until one of them succeeds. What is important, **AUTO starts inference with the CPU of the system by default**, as it provides very low latency and can start inference with no additional delays. While the CPU is performing inference, AUTO continues to load the model to the device best suited for the purpose and transfers the task to it when ready. -This way, the devices which are much slower in compiling models, GPU being the best example, do not impede inference at its initial stages. +This way, the devices which are much slower in compiling models, GPU being the best example, do not impact inference at its initial stages. For example, if you use a CPU and a GPU, the first-inference latency of AUTO will be better than that of using GPU alone. -Note that if you choose to exclude CPU from the priority list or disable the initial CPU acceleration feature via ``ov::intel_auto::enable_startup_fallback``, it will be unable to support the initial model compilation stage. - +Note that if you choose to exclude CPU from the priority list or disable the initial CPU acceleration feature via ``ov::intel_auto::enable_startup_fallback``, it will be unable to support the initial model compilation stage. The models with dynamic input/output or stateful :doc:`stateful` operations will be loaded to the CPU if it is in the candidate list. Otherwise, these models will follow the normal flow and be loaded to the device based on priority. .. image:: _static/images/autoplugin_accelerate.svg @@ -91,7 +90,7 @@ Following the OpenVINO™ naming convention, the Automatic Device Selection mode +----------------------------------------------+--------------------------------------------------------------------+ -| Property | Values and Description | +| Property(C++ version) | Values and Description | +==============================================+====================================================================+ | | **Values**: | | | | @@ -170,6 +169,25 @@ Following the OpenVINO™ naming convention, the Automatic Device Selection mode Inference with AUTO is configured similarly to when device plugins are used: you compile the model on the plugin with configuration and execute inference. +The code samples on this page assume following import(Python)/using (C++) are included at the beginning of code snippets. + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/snippets/ov_auto.py + :language: python + :fragment: [py_ov_property_import_header] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: docs/snippets/AUTO0.cpp + :language: cpp + :fragment: [py_ov_property_import_header] + + Device Candidates and Priority ++++++++++++++++++++++++++++++ @@ -303,7 +321,7 @@ If device priority is specified when using CUMULATIVE_THROUGHPUT, AUTO will run .. code-block:: sh - compiled_model = core.compile_model(model, "AUTO:GPU,CPU", {"PERFORMANCE_HINT" : {"CUMULATIVE_THROUGHPUT"}}) + compiled_model = core.compile_model(model, "AUTO:GPU,CPU", {hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT}) .. tab-item:: C++ :sync: cpp @@ -322,7 +340,7 @@ If AUTO is used without specifying any device names, and if there are multiple G .. code-block:: sh - compiled_model = core.compile_model(model, "AUTO:GPU.1,GPU.0", {"PERFORMANCE_HINT" : {"CUMULATIVE_THROUGHPUT"}) + compiled_model = core.compile_model(model, "AUTO:GPU.1,GPU.0", {hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT}) .. tab-item:: C++ :sync: cpp diff --git a/docs/_static/selector-tool/assets/selector-56fddec6.js b/docs/_static/selector-tool/assets/selector-114afa0d.js similarity index 51% rename from docs/_static/selector-tool/assets/selector-56fddec6.js rename to docs/_static/selector-tool/assets/selector-114afa0d.js index 3437a6b4b55dbc..2878b10357074f 100644 --- a/docs/_static/selector-tool/assets/selector-56fddec6.js +++ b/docs/_static/selector-tool/assets/selector-114afa0d.js @@ -1,4 +1,4 @@ -var of=Object.defineProperty;var sf=(e,t,n)=>t in e?of(e,t,{enumerable:!0,configurable:!0,writable:!0,value:n}):e[t]=n;var De=(e,t,n)=>(sf(e,typeof t!="symbol"?t+"":t,n),n);function lf(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var Ar={},af={get exports(){return Ar},set exports(e){Ar=e}},Ti={},D={},uf={get exports(){return D},set exports(e){D=e}},j={};/** +var af=Object.defineProperty;var uf=(e,t,n)=>t in e?af(e,t,{enumerable:!0,configurable:!0,writable:!0,value:n}):e[t]=n;var je=(e,t,n)=>(uf(e,typeof t!="symbol"?t+"":t,n),n);function cf(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var Ar={},df={get exports(){return Ar},set exports(e){Ar=e}},Ti={},j={},pf={get exports(){return j},set exports(e){j=e}},D={};/** * @license React * react.production.min.js * @@ -6,7 +6,7 @@ var of=Object.defineProperty;var sf=(e,t,n)=>t in e?of(e,t,{enumerable:!0,config * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. - */var ro=Symbol.for("react.element"),cf=Symbol.for("react.portal"),df=Symbol.for("react.fragment"),pf=Symbol.for("react.strict_mode"),ff=Symbol.for("react.profiler"),hf=Symbol.for("react.provider"),mf=Symbol.for("react.context"),gf=Symbol.for("react.forward_ref"),vf=Symbol.for("react.suspense"),yf=Symbol.for("react.memo"),_f=Symbol.for("react.lazy"),Aa=Symbol.iterator;function kf(e){return e===null||typeof e!="object"?null:(e=Aa&&e[Aa]||e["@@iterator"],typeof e=="function"?e:null)}var Nc={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},Ec=Object.assign,xc={};function ur(e,t,n){this.props=e,this.context=t,this.refs=xc,this.updater=n||Nc}ur.prototype.isReactComponent={};ur.prototype.setState=function(e,t){if(typeof e!="object"&&typeof e!="function"&&e!=null)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,e,t,"setState")};ur.prototype.forceUpdate=function(e){this.updater.enqueueForceUpdate(this,e,"forceUpdate")};function Pc(){}Pc.prototype=ur.prototype;function Vl(e,t,n){this.props=e,this.context=t,this.refs=xc,this.updater=n||Nc}var Fl=Vl.prototype=new Pc;Fl.constructor=Vl;Ec(Fl,ur.prototype);Fl.isPureReactComponent=!0;var za=Array.isArray,Cc=Object.prototype.hasOwnProperty,Dl={current:null},Rc={key:!0,ref:!0,__self:!0,__source:!0};function Tc(e,t,n){var r,o={},i=null,s=null;if(t!=null)for(r in t.ref!==void 0&&(s=t.ref),t.key!==void 0&&(i=""+t.key),t)Cc.call(t,r)&&!Rc.hasOwnProperty(r)&&(o[r]=t[r]);var l=arguments.length-2;if(l===1)o.children=n;else if(1t in e?of(e,t,{enumerable:!0,config * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. - */var xf=D,Pf=Symbol.for("react.element"),Cf=Symbol.for("react.fragment"),Rf=Object.prototype.hasOwnProperty,Tf=xf.__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentOwner,Lf={key:!0,ref:!0,__self:!0,__source:!0};function Lc(e,t,n){var r,o={},i=null,s=null;n!==void 0&&(i=""+n),t.key!==void 0&&(i=""+t.key),t.ref!==void 0&&(s=t.ref);for(r in t)Rf.call(t,r)&&!Lf.hasOwnProperty(r)&&(o[r]=t[r]);if(e&&e.defaultProps)for(r in t=e.defaultProps,t)o[r]===void 0&&(o[r]=t[r]);return{$$typeof:Pf,type:e,key:i,ref:s,props:o,_owner:Tf.current}}Ti.Fragment=Cf;Ti.jsx=Lc;Ti.jsxs=Lc;(function(e){e.exports=Ti})(af);const ni=Ar.Fragment,_=Ar.jsx,I=Ar.jsxs;document.body.style.cssText+=` + */var Rf=j,Tf=Symbol.for("react.element"),bf=Symbol.for("react.fragment"),Lf=Object.prototype.hasOwnProperty,If=Rf.__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentOwner,Vf={key:!0,ref:!0,__self:!0,__source:!0};function jc(e,t,n){var r,o={},i=null,s=null;n!==void 0&&(i=""+n),t.key!==void 0&&(i=""+t.key),t.ref!==void 0&&(s=t.ref);for(r in t)Lf.call(t,r)&&!Vf.hasOwnProperty(r)&&(o[r]=t[r]);if(e&&e.defaultProps)for(r in t=e.defaultProps,t)o[r]===void 0&&(o[r]=t[r]);return{$$typeof:Tf,type:e,key:i,ref:s,props:o,_owner:If.current}}Ti.Fragment=bf;Ti.jsx=jc;Ti.jsxs=jc;(function(e){e.exports=Ti})(df);const ni=Ar.Fragment,_=Ar.jsx,L=Ar.jsxs;document.body.style.cssText+=` overflow: hidden; -`;const If=()=>{const e={type:"size",height:document.body.offsetHeight};window.parent.postMessage(e)};new ResizeObserver(If).observe(document.body);function fe(e){return fe=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(t){return typeof t}:function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},fe(e)}function ct(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function bf(e,t){if(fe(e)!=="object"||e===null)return e;var n=e[Symbol.toPrimitive];if(n!==void 0){var r=n.call(e,t||"default");if(fe(r)!=="object")return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return(t==="string"?String:Number)(e)}function Ic(e){var t=bf(e,"string");return fe(t)==="symbol"?t:String(t)}function $a(e,t){for(var n=0;ne.length)&&(t=e.length);for(var n=0,r=new Array(t);n1&&arguments[1]!==void 0?arguments[1]:{};ct(this,e),this.init(t,n)}return dt(e,[{key:"init",value:function(n){var r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};this.prefix=r.prefix||"i18next:",this.logger=n||Df,this.options=r,this.debug=r.debug}},{key:"setDebug",value:function(n){this.debug=n}},{key:"log",value:function(){for(var n=arguments.length,r=new Array(n),o=0;o1?r-1:0),i=1;i-1?l.replace(/###/g,"."):l}function o(){return!e||typeof e=="string"}for(var i=typeof t!="string"?[].concat(t):t.split(".");i.length>1;){if(o())return{};var s=r(i.shift());!e[s]&&n&&(e[s]=new n),Object.prototype.hasOwnProperty.call(e,s)?e=e[s]:e={}}return o()?{}:{obj:e,k:r(i.shift())}}function Ya(e,t,n){var r=Ul(e,t,Object),o=r.obj,i=r.k;o[i]=n}function Af(e,t,n,r){var o=Ul(e,t,Object),i=o.obj,s=o.k;i[s]=i[s]||[],r&&(i[s]=i[s].concat(n)),r||i[s].push(n)}function ri(e,t){var n=Ul(e,t),r=n.obj,o=n.k;if(r)return r[o]}function Ga(e,t,n){var r=ri(e,n);return r!==void 0?r:ri(t,n)}function Dc(e,t,n){for(var r in t)r!=="__proto__"&&r!=="constructor"&&(r in e?typeof e[r]=="string"||e[r]instanceof String||typeof t[r]=="string"||t[r]instanceof String?n&&(e[r]=t[r]):Dc(e[r],t[r],n):e[r]=t[r]);return e}function bn(e){return e.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g,"\\$&")}var zf={"&":"&","<":"<",">":">",'"':""","'":"'","/":"/"};function Mf(e){return typeof e=="string"?e.replace(/[&<>"'\/]/g,function(t){return zf[t]}):e}var Ii=typeof window<"u"&&window.navigator&&typeof window.navigator.userAgentData>"u"&&window.navigator.userAgent&&window.navigator.userAgent.indexOf("MSIE")>-1,$f=[" ",",","?","!",";"];function Bf(e,t,n){t=t||"",n=n||"";var r=$f.filter(function(l){return t.indexOf(l)<0&&n.indexOf(l)<0});if(r.length===0)return!0;var o=new RegExp("(".concat(r.map(function(l){return l==="?"?"\\?":l}).join("|"),")")),i=!o.test(e);if(!i){var s=e.indexOf(n);s>0&&!o.test(e.substring(0,s))&&(i=!0)}return i}function Qa(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),n.push.apply(n,r)}return n}function mo(e){for(var t=1;t"u"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch{return!1}}function jc(e,t){var n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:".";if(e){if(e[t])return e[t];for(var r=t.split(n),o=e,i=0;ii+s;)s++,l=r.slice(i,i+s).join(n),a=o[l];if(a===void 0)return;if(a===null)return null;if(t.endsWith(l)){if(typeof a=="string")return a;if(l&&typeof a[l]=="string")return a[l]}var u=r.slice(i+s).join(n);return u?jc(a,u,n):void 0}o=o[r[i]]}return o}}var Wf=function(e){Li(n,e);var t=Kf(n);function n(r){var o,i=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{ns:["translation"],defaultNS:"translation"};return ct(this,n),o=t.call(this),Ii&&tn.call(Wt(o)),o.data=r||{},o.options=i,o.options.keySeparator===void 0&&(o.options.keySeparator="."),o.options.ignoreJSONStructure===void 0&&(o.options.ignoreJSONStructure=!0),o}return dt(n,[{key:"addNamespaces",value:function(o){this.options.ns.indexOf(o)<0&&this.options.ns.push(o)}},{key:"removeNamespaces",value:function(o){var i=this.options.ns.indexOf(o);i>-1&&this.options.ns.splice(i,1)}},{key:"getResource",value:function(o,i,s){var l=arguments.length>3&&arguments[3]!==void 0?arguments[3]:{},a=l.keySeparator!==void 0?l.keySeparator:this.options.keySeparator,u=l.ignoreJSONStructure!==void 0?l.ignoreJSONStructure:this.options.ignoreJSONStructure,f=[o,i];s&&typeof s!="string"&&(f=f.concat(s)),s&&typeof s=="string"&&(f=f.concat(a?s.split(a):s)),o.indexOf(".")>-1&&(f=o.split("."));var d=ri(this.data,f);return d||!u||typeof s!="string"?d:jc(this.data&&this.data[o]&&this.data[o][i],s,a)}},{key:"addResource",value:function(o,i,s,l){var a=arguments.length>4&&arguments[4]!==void 0?arguments[4]:{silent:!1},u=this.options.keySeparator;u===void 0&&(u=".");var f=[o,i];s&&(f=f.concat(u?s.split(u):s)),o.indexOf(".")>-1&&(f=o.split("."),l=i,i=f[1]),this.addNamespaces(i),Ya(this.data,f,l),a.silent||this.emit("added",o,i,s,l)}},{key:"addResources",value:function(o,i,s){var l=arguments.length>3&&arguments[3]!==void 0?arguments[3]:{silent:!1};for(var a in s)(typeof s[a]=="string"||Object.prototype.toString.apply(s[a])==="[object Array]")&&this.addResource(o,i,a,s[a],{silent:!0});l.silent||this.emit("added",o,i,s)}},{key:"addResourceBundle",value:function(o,i,s,l,a){var u=arguments.length>5&&arguments[5]!==void 0?arguments[5]:{silent:!1},f=[o,i];o.indexOf(".")>-1&&(f=o.split("."),l=s,s=i,i=f[1]),this.addNamespaces(i);var d=ri(this.data,f)||{};l?Dc(d,s,a):d=mo(mo({},d),s),Ya(this.data,f,d),u.silent||this.emit("added",o,i,s)}},{key:"removeResourceBundle",value:function(o,i){this.hasResourceBundle(o,i)&&delete this.data[o][i],this.removeNamespaces(i),this.emit("removed",o,i)}},{key:"hasResourceBundle",value:function(o,i){return this.getResource(o,i)!==void 0}},{key:"getResourceBundle",value:function(o,i){return i||(i=this.options.defaultNS),this.options.compatibilityAPI==="v1"?mo(mo({},{}),this.getResource(o,i)):this.getResource(o,i)}},{key:"getDataByLanguage",value:function(o){return this.data[o]}},{key:"hasLanguageSomeTranslations",value:function(o){var i=this.getDataByLanguage(o),s=i&&Object.keys(i)||[];return!!s.find(function(l){return i[l]&&Object.keys(i[l]).length>0})}},{key:"toJSON",value:function(){return this.data}}]),n}(tn),Uc={processors:{},addPostProcessor:function(t){this.processors[t.name]=t},handle:function(t,n,r,o,i){var s=this;return t.forEach(function(l){s.processors[l]&&(n=s.processors[l].process(n,r,o,i))}),n}};function qa(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),n.push.apply(n,r)}return n}function we(e){for(var t=1;t"u"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch{return!1}}var Xa={},Ja=function(e){Li(n,e);var t=Yf(n);function n(r){var o,i=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};return ct(this,n),o=t.call(this),Ii&&tn.call(Wt(o)),Uf(["resourceStore","languageUtils","pluralResolver","interpolator","backendConnector","i18nFormat","utils"],r,Wt(o)),o.options=i,o.options.keySeparator===void 0&&(o.options.keySeparator="."),o.logger=yt.create("translator"),o}return dt(n,[{key:"changeLanguage",value:function(o){o&&(this.language=o)}},{key:"exists",value:function(o){var i=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{interpolation:{}};if(o==null)return!1;var s=this.resolve(o,i);return s&&s.res!==void 0}},{key:"extractFromKey",value:function(o,i){var s=i.nsSeparator!==void 0?i.nsSeparator:this.options.nsSeparator;s===void 0&&(s=":");var l=i.keySeparator!==void 0?i.keySeparator:this.options.keySeparator,a=i.ns||this.options.defaultNS||[],u=s&&o.indexOf(s)>-1,f=!this.options.userDefinedKeySeparator&&!i.keySeparator&&!this.options.userDefinedNsSeparator&&!i.nsSeparator&&!Bf(o,s,l);if(u&&!f){var d=o.match(this.interpolator.nestingRegexp);if(d&&d.length>0)return{key:o,namespaces:a};var h=o.split(s);(s!==l||s===l&&this.options.ns.indexOf(h[0])>-1)&&(a=h.shift()),o=h.join(l)}return typeof a=="string"&&(a=[a]),{key:o,namespaces:a}}},{key:"translate",value:function(o,i,s){var l=this;if(fe(i)!=="object"&&this.options.overloadTranslationOptionHandler&&(i=this.options.overloadTranslationOptionHandler(arguments)),i||(i={}),o==null)return"";Array.isArray(o)||(o=[String(o)]);var a=i.returnDetails!==void 0?i.returnDetails:this.options.returnDetails,u=i.keySeparator!==void 0?i.keySeparator:this.options.keySeparator,f=this.extractFromKey(o[o.length-1],i),d=f.key,h=f.namespaces,g=h[h.length-1],v=i.lng||this.language,k=i.appendNamespaceToCIMode||this.options.appendNamespaceToCIMode;if(v&&v.toLowerCase()==="cimode"){if(k){var O=i.nsSeparator||this.options.nsSeparator;return a?{res:"".concat(g).concat(O).concat(d),usedKey:d,exactUsedKey:d,usedLng:v,usedNS:g}:"".concat(g).concat(O).concat(d)}return a?{res:d,usedKey:d,exactUsedKey:d,usedLng:v,usedNS:g}:d}var p=this.resolve(o,i),c=p&&p.res,m=p&&p.usedKey||d,y=p&&p.exactUsedKey||d,S=Object.prototype.toString.apply(c),w=["[object Number]","[object Function]","[object RegExp]"],E=i.joinArrays!==void 0?i.joinArrays:this.options.joinArrays,x=!this.i18nFormat||this.i18nFormat.handleAsObject,V=typeof c!="string"&&typeof c!="boolean"&&typeof c!="number";if(x&&c&&V&&w.indexOf(S)<0&&!(typeof E=="string"&&S==="[object Array]")){if(!i.returnObjects&&!this.options.returnObjects){this.options.returnedObjectHandler||this.logger.warn("accessing an object - but returnObjects options is not enabled!");var P=this.options.returnedObjectHandler?this.options.returnedObjectHandler(m,c,we(we({},i),{},{ns:h})):"key '".concat(d," (").concat(this.language,")' returned an object instead of string.");return a?(p.res=P,p):P}if(u){var K=S==="[object Array]",Ce=K?[]:{},Ot=K?y:m;for(var Ze in c)if(Object.prototype.hasOwnProperty.call(c,Ze)){var Tn="".concat(Ot).concat(u).concat(Ze);Ce[Ze]=this.translate(Tn,we(we({},i),{joinArrays:!1,ns:h})),Ce[Ze]===Tn&&(Ce[Ze]=c[Ze])}c=Ce}}else if(x&&typeof E=="string"&&S==="[object Array]")c=c.join(E),c&&(c=this.extendTranslation(c,o,i,s));else{var ft=!1,et=!1,R=i.count!==void 0&&typeof i.count!="string",b=n.hasDefaultValue(i),F=R?this.pluralResolver.getSuffix(v,i.count,i):"",A=i["defaultValue".concat(F)]||i.defaultValue;!this.isValidLookup(c)&&b&&(ft=!0,c=A),this.isValidLookup(c)||(et=!0,c=d);var q=i.missingKeyNoValueFallbackToKey||this.options.missingKeyNoValueFallbackToKey,Nt=q&&et?void 0:c,Ve=b&&A!==c&&this.options.updateMissing;if(et||ft||Ve){if(this.logger.log(Ve?"updateKey":"missingKey",v,g,d,Ve?A:c),u){var Ln=this.resolve(d,we(we({},i),{},{keySeparator:!1}));Ln&&Ln.res&&this.logger.warn("Seems the loaded translations were in flat JSON format instead of nested. Either set keySeparator: false on init or make sure your translations are published in nested format.")}var Fe=[],Et=this.languageUtils.getFallbackCodes(this.options.fallbackLng,i.lng||this.language);if(this.options.saveMissingTo==="fallback"&&Et&&Et[0])for(var Gi=0;Gi1&&arguments[1]!==void 0?arguments[1]:{},l,a,u,f,d;return typeof o=="string"&&(o=[o]),o.forEach(function(h){if(!i.isValidLookup(l)){var g=i.extractFromKey(h,s),v=g.key;a=v;var k=g.namespaces;i.options.fallbackNS&&(k=k.concat(i.options.fallbackNS));var O=s.count!==void 0&&typeof s.count!="string",p=O&&!s.ordinal&&s.count===0&&i.pluralResolver.shouldUseIntlApi(),c=s.context!==void 0&&(typeof s.context=="string"||typeof s.context=="number")&&s.context!=="",m=s.lngs?s.lngs:i.languageUtils.toResolveHierarchy(s.lng||i.language,s.fallbackLng);k.forEach(function(y){i.isValidLookup(l)||(d=y,!Xa["".concat(m[0],"-").concat(y)]&&i.utils&&i.utils.hasLoadedNamespace&&!i.utils.hasLoadedNamespace(d)&&(Xa["".concat(m[0],"-").concat(y)]=!0,i.logger.warn('key "'.concat(a,'" for languages "').concat(m.join(", "),`" won't get resolved as namespace "`).concat(d,'" was not yet loaded'),"This means something IS WRONG in your setup. You access the t function before i18next.init / i18next.loadNamespace / i18next.changeLanguage was done. Wait for the callback or Promise to resolve before accessing it!!!")),m.forEach(function(S){if(!i.isValidLookup(l)){f=S;var w=[v];if(i.i18nFormat&&i.i18nFormat.addLookupKeys)i.i18nFormat.addLookupKeys(w,v,S,y,s);else{var E;O&&(E=i.pluralResolver.getSuffix(S,s.count,s));var x="".concat(i.options.pluralSeparator,"zero");if(O&&(w.push(v+E),p&&w.push(v+x)),c){var V="".concat(v).concat(i.options.contextSeparator).concat(s.context);w.push(V),O&&(w.push(V+E),p&&w.push(V+x))}}for(var P;P=w.pop();)i.isValidLookup(l)||(u=P,l=i.getResource(S,y,P,s))}}))})}}),{res:l,usedKey:a,exactUsedKey:u,usedLng:f,usedNS:d}}},{key:"isValidLookup",value:function(o){return o!==void 0&&!(!this.options.returnNull&&o===null)&&!(!this.options.returnEmptyString&&o==="")}},{key:"getResource",value:function(o,i,s){var l=arguments.length>3&&arguments[3]!==void 0?arguments[3]:{};return this.i18nFormat&&this.i18nFormat.getResource?this.i18nFormat.getResource(o,i,s,l):this.resourceStore.getResource(o,i,s,l)}}],[{key:"hasDefaultValue",value:function(o){var i="defaultValue";for(var s in o)if(Object.prototype.hasOwnProperty.call(o,s)&&i===s.substring(0,i.length)&&o[s]!==void 0)return!0;return!1}}]),n}(tn);function Ji(e){return e.charAt(0).toUpperCase()+e.slice(1)}var Za=function(){function e(t){ct(this,e),this.options=t,this.supportedLngs=this.options.supportedLngs||!1,this.logger=yt.create("languageUtils")}return dt(e,[{key:"getScriptPartFromCode",value:function(n){if(!n||n.indexOf("-")<0)return null;var r=n.split("-");return r.length===2||(r.pop(),r[r.length-1].toLowerCase()==="x")?null:this.formatLanguageCode(r.join("-"))}},{key:"getLanguagePartFromCode",value:function(n){if(!n||n.indexOf("-")<0)return n;var r=n.split("-");return this.formatLanguageCode(r[0])}},{key:"formatLanguageCode",value:function(n){if(typeof n=="string"&&n.indexOf("-")>-1){var r=["hans","hant","latn","cyrl","cans","mong","arab"],o=n.split("-");return this.options.lowerCaseLng?o=o.map(function(i){return i.toLowerCase()}):o.length===2?(o[0]=o[0].toLowerCase(),o[1]=o[1].toUpperCase(),r.indexOf(o[1].toLowerCase())>-1&&(o[1]=Ji(o[1].toLowerCase()))):o.length===3&&(o[0]=o[0].toLowerCase(),o[1].length===2&&(o[1]=o[1].toUpperCase()),o[0]!=="sgn"&&o[2].length===2&&(o[2]=o[2].toUpperCase()),r.indexOf(o[1].toLowerCase())>-1&&(o[1]=Ji(o[1].toLowerCase())),r.indexOf(o[2].toLowerCase())>-1&&(o[2]=Ji(o[2].toLowerCase()))),o.join("-")}return this.options.cleanCode||this.options.lowerCaseLng?n.toLowerCase():n}},{key:"isSupportedCode",value:function(n){return(this.options.load==="languageOnly"||this.options.nonExplicitSupportedLngs)&&(n=this.getLanguagePartFromCode(n)),!this.supportedLngs||!this.supportedLngs.length||this.supportedLngs.indexOf(n)>-1}},{key:"getBestMatchFromCodes",value:function(n){var r=this;if(!n)return null;var o;return n.forEach(function(i){if(!o){var s=r.formatLanguageCode(i);(!r.options.supportedLngs||r.isSupportedCode(s))&&(o=s)}}),!o&&this.options.supportedLngs&&n.forEach(function(i){if(!o){var s=r.getLanguagePartFromCode(i);if(r.isSupportedCode(s))return o=s;o=r.options.supportedLngs.find(function(l){if(l.indexOf(s)===0)return l})}}),o||(o=this.getFallbackCodes(this.options.fallbackLng)[0]),o}},{key:"getFallbackCodes",value:function(n,r){if(!n)return[];if(typeof n=="function"&&(n=n(r)),typeof n=="string"&&(n=[n]),Object.prototype.toString.apply(n)==="[object Array]")return n;if(!r)return n.default||[];var o=n[r];return o||(o=n[this.getScriptPartFromCode(r)]),o||(o=n[this.formatLanguageCode(r)]),o||(o=n[this.getLanguagePartFromCode(r)]),o||(o=n.default),o||[]}},{key:"toResolveHierarchy",value:function(n,r){var o=this,i=this.getFallbackCodes(r||this.options.fallbackLng||[],n),s=[],l=function(u){u&&(o.isSupportedCode(u)?s.push(u):o.logger.warn("rejecting language code not found in supportedLngs: ".concat(u)))};return typeof n=="string"&&n.indexOf("-")>-1?(this.options.load!=="languageOnly"&&l(this.formatLanguageCode(n)),this.options.load!=="languageOnly"&&this.options.load!=="currentOnly"&&l(this.getScriptPartFromCode(n)),this.options.load!=="currentOnly"&&l(this.getLanguagePartFromCode(n))):typeof n=="string"&&l(this.formatLanguageCode(n)),i.forEach(function(a){s.indexOf(a)<0&&l(o.formatLanguageCode(a))}),s}}]),e}(),Qf=[{lngs:["ach","ak","am","arn","br","fil","gun","ln","mfe","mg","mi","oc","pt","pt-BR","tg","tl","ti","tr","uz","wa"],nr:[1,2],fc:1},{lngs:["af","an","ast","az","bg","bn","ca","da","de","dev","el","en","eo","es","et","eu","fi","fo","fur","fy","gl","gu","ha","hi","hu","hy","ia","it","kk","kn","ku","lb","mai","ml","mn","mr","nah","nap","nb","ne","nl","nn","no","nso","pa","pap","pms","ps","pt-PT","rm","sco","se","si","so","son","sq","sv","sw","ta","te","tk","ur","yo"],nr:[1,2],fc:2},{lngs:["ay","bo","cgg","fa","ht","id","ja","jbo","ka","km","ko","ky","lo","ms","sah","su","th","tt","ug","vi","wo","zh"],nr:[1],fc:3},{lngs:["be","bs","cnr","dz","hr","ru","sr","uk"],nr:[1,2,5],fc:4},{lngs:["ar"],nr:[0,1,2,3,11,100],fc:5},{lngs:["cs","sk"],nr:[1,2,5],fc:6},{lngs:["csb","pl"],nr:[1,2,5],fc:7},{lngs:["cy"],nr:[1,2,3,8],fc:8},{lngs:["fr"],nr:[1,2],fc:9},{lngs:["ga"],nr:[1,2,3,7,11],fc:10},{lngs:["gd"],nr:[1,2,3,20],fc:11},{lngs:["is"],nr:[1,2],fc:12},{lngs:["jv"],nr:[0,1],fc:13},{lngs:["kw"],nr:[1,2,3,4],fc:14},{lngs:["lt"],nr:[1,2,10],fc:15},{lngs:["lv"],nr:[1,2,0],fc:16},{lngs:["mk"],nr:[1,2],fc:17},{lngs:["mnk"],nr:[0,1,2],fc:18},{lngs:["mt"],nr:[1,2,11,20],fc:19},{lngs:["or"],nr:[2,1],fc:2},{lngs:["ro"],nr:[1,2,20],fc:20},{lngs:["sl"],nr:[5,1,2,3],fc:21},{lngs:["he","iw"],nr:[1,2,20,21],fc:22}],qf={1:function(t){return+(t>1)},2:function(t){return+(t!=1)},3:function(t){return 0},4:function(t){return t%10==1&&t%100!=11?0:t%10>=2&&t%10<=4&&(t%100<10||t%100>=20)?1:2},5:function(t){return t==0?0:t==1?1:t==2?2:t%100>=3&&t%100<=10?3:t%100>=11?4:5},6:function(t){return t==1?0:t>=2&&t<=4?1:2},7:function(t){return t==1?0:t%10>=2&&t%10<=4&&(t%100<10||t%100>=20)?1:2},8:function(t){return t==1?0:t==2?1:t!=8&&t!=11?2:3},9:function(t){return+(t>=2)},10:function(t){return t==1?0:t==2?1:t<7?2:t<11?3:4},11:function(t){return t==1||t==11?0:t==2||t==12?1:t>2&&t<20?2:3},12:function(t){return+(t%10!=1||t%100==11)},13:function(t){return+(t!==0)},14:function(t){return t==1?0:t==2?1:t==3?2:3},15:function(t){return t%10==1&&t%100!=11?0:t%10>=2&&(t%100<10||t%100>=20)?1:2},16:function(t){return t%10==1&&t%100!=11?0:t!==0?1:2},17:function(t){return t==1||t%10==1&&t%100!=11?0:1},18:function(t){return t==0?0:t==1?1:2},19:function(t){return t==1?0:t==0||t%100>1&&t%100<11?1:t%100>10&&t%100<20?2:3},20:function(t){return t==1?0:t==0||t%100>0&&t%100<20?1:2},21:function(t){return t%100==1?1:t%100==2?2:t%100==3||t%100==4?3:0},22:function(t){return t==1?0:t==2?1:(t<0||t>10)&&t%10==0?2:3}},Xf=["v1","v2","v3"],eu={zero:0,one:1,two:2,few:3,many:4,other:5};function Jf(){var e={};return Qf.forEach(function(t){t.lngs.forEach(function(n){e[n]={numbers:t.nr,plurals:qf[t.fc]}})}),e}var Zf=function(){function e(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};ct(this,e),this.languageUtils=t,this.options=n,this.logger=yt.create("pluralResolver"),(!this.options.compatibilityJSON||this.options.compatibilityJSON==="v4")&&(typeof Intl>"u"||!Intl.PluralRules)&&(this.options.compatibilityJSON="v3",this.logger.error("Your environment seems not to be Intl API compatible, use an Intl.PluralRules polyfill. Will fallback to the compatibilityJSON v3 format handling.")),this.rules=Jf()}return dt(e,[{key:"addRule",value:function(n,r){this.rules[n]=r}},{key:"getRule",value:function(n){var r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};if(this.shouldUseIntlApi())try{return new Intl.PluralRules(n,{type:r.ordinal?"ordinal":"cardinal"})}catch{return}return this.rules[n]||this.rules[this.languageUtils.getLanguagePartFromCode(n)]}},{key:"needsPlural",value:function(n){var r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},o=this.getRule(n,r);return this.shouldUseIntlApi()?o&&o.resolvedOptions().pluralCategories.length>1:o&&o.numbers.length>1}},{key:"getPluralFormsOfKey",value:function(n,r){var o=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{};return this.getSuffixes(n,o).map(function(i){return"".concat(r).concat(i)})}},{key:"getSuffixes",value:function(n){var r=this,o=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},i=this.getRule(n,o);return i?this.shouldUseIntlApi()?i.resolvedOptions().pluralCategories.sort(function(s,l){return eu[s]-eu[l]}).map(function(s){return"".concat(r.options.prepend).concat(s)}):i.numbers.map(function(s){return r.getSuffix(n,s,o)}):[]}},{key:"getSuffix",value:function(n,r){var o=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{},i=this.getRule(n,o);return i?this.shouldUseIntlApi()?"".concat(this.options.prepend).concat(i.select(r)):this.getSuffixRetroCompatible(i,r):(this.logger.warn("no plural rule found for: ".concat(n)),"")}},{key:"getSuffixRetroCompatible",value:function(n,r){var o=this,i=n.noAbs?n.plurals(r):n.plurals(Math.abs(r)),s=n.numbers[i];this.options.simplifyPluralSuffix&&n.numbers.length===2&&n.numbers[0]===1&&(s===2?s="plural":s===1&&(s=""));var l=function(){return o.options.prepend&&s.toString()?o.options.prepend+s.toString():s.toString()};return this.options.compatibilityJSON==="v1"?s===1?"":typeof s=="number"?"_plural_".concat(s.toString()):l():this.options.compatibilityJSON==="v2"||this.options.simplifyPluralSuffix&&n.numbers.length===2&&n.numbers[0]===1?l():this.options.prepend&&i.toString()?this.options.prepend+i.toString():i.toString()}},{key:"shouldUseIntlApi",value:function(){return!Xf.includes(this.options.compatibilityJSON)}}]),e}();function tu(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),n.push.apply(n,r)}return n}function tt(e){for(var t=1;t0&&arguments[0]!==void 0?arguments[0]:{};ct(this,e),this.logger=yt.create("interpolator"),this.options=t,this.format=t.interpolation&&t.interpolation.format||function(n){return n},this.init(t)}return dt(e,[{key:"init",value:function(){var n=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{};n.interpolation||(n.interpolation={escapeValue:!0});var r=n.interpolation;this.escape=r.escape!==void 0?r.escape:Mf,this.escapeValue=r.escapeValue!==void 0?r.escapeValue:!0,this.useRawValueToEscape=r.useRawValueToEscape!==void 0?r.useRawValueToEscape:!1,this.prefix=r.prefix?bn(r.prefix):r.prefixEscaped||"{{",this.suffix=r.suffix?bn(r.suffix):r.suffixEscaped||"}}",this.formatSeparator=r.formatSeparator?r.formatSeparator:r.formatSeparator||",",this.unescapePrefix=r.unescapeSuffix?"":r.unescapePrefix||"-",this.unescapeSuffix=this.unescapePrefix?"":r.unescapeSuffix||"",this.nestingPrefix=r.nestingPrefix?bn(r.nestingPrefix):r.nestingPrefixEscaped||bn("$t("),this.nestingSuffix=r.nestingSuffix?bn(r.nestingSuffix):r.nestingSuffixEscaped||bn(")"),this.nestingOptionsSeparator=r.nestingOptionsSeparator?r.nestingOptionsSeparator:r.nestingOptionsSeparator||",",this.maxReplaces=r.maxReplaces?r.maxReplaces:1e3,this.alwaysFormat=r.alwaysFormat!==void 0?r.alwaysFormat:!1,this.resetRegExp()}},{key:"reset",value:function(){this.options&&this.init(this.options)}},{key:"resetRegExp",value:function(){var n="".concat(this.prefix,"(.+?)").concat(this.suffix);this.regexp=new RegExp(n,"g");var r="".concat(this.prefix).concat(this.unescapePrefix,"(.+?)").concat(this.unescapeSuffix).concat(this.suffix);this.regexpUnescape=new RegExp(r,"g");var o="".concat(this.nestingPrefix,"(.+?)").concat(this.nestingSuffix);this.nestingRegexp=new RegExp(o,"g")}},{key:"interpolate",value:function(n,r,o,i){var s=this,l,a,u,f=this.options&&this.options.interpolation&&this.options.interpolation.defaultVariables||{};function d(O){return O.replace(/\$/g,"$$$$")}var h=function(p){if(p.indexOf(s.formatSeparator)<0){var c=Ga(r,f,p);return s.alwaysFormat?s.format(c,void 0,o,tt(tt(tt({},i),r),{},{interpolationkey:p})):c}var m=p.split(s.formatSeparator),y=m.shift().trim(),S=m.join(s.formatSeparator).trim();return s.format(Ga(r,f,y),S,o,tt(tt(tt({},i),r),{},{interpolationkey:y}))};this.resetRegExp();var g=i&&i.missingInterpolationHandler||this.options.missingInterpolationHandler,v=i&&i.interpolation&&i.interpolation.skipOnVariables!==void 0?i.interpolation.skipOnVariables:this.options.interpolation.skipOnVariables,k=[{regex:this.regexpUnescape,safeValue:function(p){return d(p)}},{regex:this.regexp,safeValue:function(p){return s.escapeValue?d(s.escape(p)):d(p)}}];return k.forEach(function(O){for(u=0;l=O.regex.exec(n);){var p=l[1].trim();if(a=h(p),a===void 0)if(typeof g=="function"){var c=g(n,l,i);a=typeof c=="string"?c:""}else if(i&&Object.prototype.hasOwnProperty.call(i,p))a="";else if(v){a=l[0];continue}else s.logger.warn("missed to pass in variable ".concat(p," for interpolating ").concat(n)),a="";else typeof a!="string"&&!s.useRawValueToEscape&&(a=Wa(a));var m=O.safeValue(a);if(n=n.replace(l[0],m),v?(O.regex.lastIndex+=a.length,O.regex.lastIndex-=l[0].length):O.regex.lastIndex=0,u++,u>=s.maxReplaces)break}}),n}},{key:"nest",value:function(n,r){var o=this,i=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{},s,l,a;function u(g,v){var k=this.nestingOptionsSeparator;if(g.indexOf(k)<0)return g;var O=g.split(new RegExp("".concat(k,"[ ]*{"))),p="{".concat(O[1]);g=O[0],p=this.interpolate(p,a);var c=p.match(/'/g),m=p.match(/"/g);(c&&c.length%2===0&&!m||m.length%2!==0)&&(p=p.replace(/'/g,'"'));try{a=JSON.parse(p),v&&(a=tt(tt({},v),a))}catch(y){return this.logger.warn("failed parsing options string in nesting for key ".concat(g),y),"".concat(g).concat(k).concat(p)}return delete a.defaultValue,g}for(;s=this.nestingRegexp.exec(n);){var f=[];a=tt({},i),a=a.replace&&typeof a.replace!="string"?a.replace:a,a.applyPostProcessor=!1,delete a.defaultValue;var d=!1;if(s[0].indexOf(this.formatSeparator)!==-1&&!/{.*}/.test(s[1])){var h=s[1].split(this.formatSeparator).map(function(g){return g.trim()});s[1]=h.shift(),f=h,d=!0}if(l=r(u.call(this,s[1].trim(),a),a),l&&s[0]===n&&typeof l!="string")return l;typeof l!="string"&&(l=Wa(l)),l||(this.logger.warn("missed to resolve ".concat(s[1]," for nesting ").concat(n)),l=""),d&&(l=f.reduce(function(g,v){return o.format(g,v,i.lng,tt(tt({},i),{},{interpolationkey:s[1].trim()}))},l.trim())),n=n.replace(s[0],l),this.regexp.lastIndex=0}return n}}]),e}();function nu(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),n.push.apply(n,r)}return n}function xt(e){for(var t=1;t-1){var r=e.split("(");t=r[0].toLowerCase().trim();var o=r[1].substring(0,r[1].length-1);if(t==="currency"&&o.indexOf(":")<0)n.currency||(n.currency=o.trim());else if(t==="relativetime"&&o.indexOf(":")<0)n.range||(n.range=o.trim());else{var i=o.split(";");i.forEach(function(s){if(s){var l=s.split(":"),a=Ff(l),u=a[0],f=a.slice(1),d=f.join(":").trim().replace(/^'+|'+$/g,"");n[u.trim()]||(n[u.trim()]=d),d==="false"&&(n[u.trim()]=!1),d==="true"&&(n[u.trim()]=!0),isNaN(d)||(n[u.trim()]=parseInt(d,10))}})}}return{formatName:t,formatOptions:n}}function Vn(e){var t={};return function(r,o,i){var s=o+JSON.stringify(i),l=t[s];return l||(l=e(o,i),t[s]=l),l(r)}}var nh=function(){function e(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{};ct(this,e),this.logger=yt.create("formatter"),this.options=t,this.formats={number:Vn(function(n,r){var o=new Intl.NumberFormat(n,xt({},r));return function(i){return o.format(i)}}),currency:Vn(function(n,r){var o=new Intl.NumberFormat(n,xt(xt({},r),{},{style:"currency"}));return function(i){return o.format(i)}}),datetime:Vn(function(n,r){var o=new Intl.DateTimeFormat(n,xt({},r));return function(i){return o.format(i)}}),relativetime:Vn(function(n,r){var o=new Intl.RelativeTimeFormat(n,xt({},r));return function(i){return o.format(i,r.range||"day")}}),list:Vn(function(n,r){var o=new Intl.ListFormat(n,xt({},r));return function(i){return o.format(i)}})},this.init(t)}return dt(e,[{key:"init",value:function(n){var r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{interpolation:{}},o=r.interpolation;this.formatSeparator=o.formatSeparator?o.formatSeparator:o.formatSeparator||","}},{key:"add",value:function(n,r){this.formats[n.toLowerCase().trim()]=r}},{key:"addCached",value:function(n,r){this.formats[n.toLowerCase().trim()]=Vn(r)}},{key:"format",value:function(n,r,o){var i=this,s=arguments.length>3&&arguments[3]!==void 0?arguments[3]:{},l=r.split(this.formatSeparator),a=l.reduce(function(u,f){var d=th(f),h=d.formatName,g=d.formatOptions;if(i.formats[h]){var v=u;try{var k=s&&s.formatParams&&s.formatParams[s.interpolationkey]||{},O=k.locale||k.lng||s.locale||s.lng||o;v=i.formats[h](u,O,xt(xt(xt({},g),s),k))}catch(p){i.logger.warn(p)}return v}else i.logger.warn("there was no format function for ".concat(h));return u},n);return a}}]),e}();function ru(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),n.push.apply(n,r)}return n}function ou(e){for(var t=1;t"u"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch{return!1}}function ih(e,t){e.pending[t]!==void 0&&(delete e.pending[t],e.pendingCount--)}var sh=function(e){Li(n,e);var t=rh(n);function n(r,o,i){var s,l=arguments.length>3&&arguments[3]!==void 0?arguments[3]:{};return ct(this,n),s=t.call(this),Ii&&tn.call(Wt(s)),s.backend=r,s.store=o,s.services=i,s.languageUtils=i.languageUtils,s.options=l,s.logger=yt.create("backendConnector"),s.waitingReads=[],s.maxParallelReads=l.maxParallelReads||10,s.readingCalls=0,s.maxRetries=l.maxRetries>=0?l.maxRetries:5,s.retryTimeout=l.retryTimeout>=1?l.retryTimeout:350,s.state={},s.queue=[],s.backend&&s.backend.init&&s.backend.init(i,l.backend,l),s}return dt(n,[{key:"queueLoad",value:function(o,i,s,l){var a=this,u={},f={},d={},h={};return o.forEach(function(g){var v=!0;i.forEach(function(k){var O="".concat(g,"|").concat(k);!s.reload&&a.store.hasResourceBundle(g,k)?a.state[O]=2:a.state[O]<0||(a.state[O]===1?f[O]===void 0&&(f[O]=!0):(a.state[O]=1,v=!1,f[O]===void 0&&(f[O]=!0),u[O]===void 0&&(u[O]=!0),h[k]===void 0&&(h[k]=!0)))}),v||(d[g]=!0)}),(Object.keys(u).length||Object.keys(f).length)&&this.queue.push({pending:f,pendingCount:Object.keys(f).length,loaded:{},errors:[],callback:l}),{toLoad:Object.keys(u),pending:Object.keys(f),toLoadLanguages:Object.keys(d),toLoadNamespaces:Object.keys(h)}}},{key:"loaded",value:function(o,i,s){var l=o.split("|"),a=l[0],u=l[1];i&&this.emit("failedLoading",a,u,i),s&&this.store.addResourceBundle(a,u,s),this.state[o]=i?-1:2;var f={};this.queue.forEach(function(d){Af(d.loaded,[a],u),ih(d,o),i&&d.errors.push(i),d.pendingCount===0&&!d.done&&(Object.keys(d.loaded).forEach(function(h){f[h]||(f[h]={});var g=d.loaded[h];g.length&&g.forEach(function(v){f[h][v]===void 0&&(f[h][v]=!0)})}),d.done=!0,d.errors.length?d.callback(d.errors):d.callback())}),this.emit("loaded",f),this.queue=this.queue.filter(function(d){return!d.done})}},{key:"read",value:function(o,i,s){var l=this,a=arguments.length>3&&arguments[3]!==void 0?arguments[3]:0,u=arguments.length>4&&arguments[4]!==void 0?arguments[4]:this.retryTimeout,f=arguments.length>5?arguments[5]:void 0;if(!o.length)return f(null,{});if(this.readingCalls>=this.maxParallelReads){this.waitingReads.push({lng:o,ns:i,fcName:s,tried:a,wait:u,callback:f});return}this.readingCalls++;var d=function(k,O){if(l.readingCalls--,l.waitingReads.length>0){var p=l.waitingReads.shift();l.read(p.lng,p.ns,p.fcName,p.tried,p.wait,p.callback)}if(k&&O&&a2&&arguments[2]!==void 0?arguments[2]:{},a=arguments.length>3?arguments[3]:void 0;if(!this.backend)return this.logger.warn("No backend was added via i18next.use. Will not load resources."),a&&a();typeof o=="string"&&(o=this.languageUtils.toResolveHierarchy(o)),typeof i=="string"&&(i=[i]);var u=this.queueLoad(o,i,l,a);if(!u.toLoad.length)return u.pending.length||a(),null;u.toLoad.forEach(function(f){s.loadOne(f)})}},{key:"load",value:function(o,i,s){this.prepareLoading(o,i,{},s)}},{key:"reload",value:function(o,i,s){this.prepareLoading(o,i,{reload:!0},s)}},{key:"loadOne",value:function(o){var i=this,s=arguments.length>1&&arguments[1]!==void 0?arguments[1]:"",l=o.split("|"),a=l[0],u=l[1];this.read(a,u,"read",void 0,void 0,function(f,d){f&&i.logger.warn("".concat(s,"loading namespace ").concat(u," for language ").concat(a," failed"),f),!f&&d&&i.logger.log("".concat(s,"loaded namespace ").concat(u," for language ").concat(a),d),i.loaded(o,f,d)})}},{key:"saveMissing",value:function(o,i,s,l,a){var u=arguments.length>5&&arguments[5]!==void 0?arguments[5]:{},f=arguments.length>6&&arguments[6]!==void 0?arguments[6]:function(){};if(this.services.utils&&this.services.utils.hasLoadedNamespace&&!this.services.utils.hasLoadedNamespace(i)){this.logger.warn('did not save key "'.concat(s,'" as the namespace "').concat(i,'" was not yet loaded'),"This means something IS WRONG in your setup. You access the t function before i18next.init / i18next.loadNamespace / i18next.changeLanguage was done. Wait for the callback or Promise to resolve before accessing it!!!");return}if(!(s==null||s==="")){if(this.backend&&this.backend.create){var d=ou(ou({},u),{},{isUpdate:a}),h=this.backend.create.bind(this.backend);if(h.length<6)try{var g;h.length===5?g=h(o,i,s,l,d):g=h(o,i,s,l),g&&typeof g.then=="function"?g.then(function(v){return f(null,v)}).catch(f):f(null,g)}catch(v){f(v)}else h(o,i,s,l,f,d)}!o||!o[0]||this.store.addResource(o[0],i,s,l)}}}]),n}(tn);function iu(){return{debug:!1,initImmediate:!0,ns:["translation"],defaultNS:["translation"],fallbackLng:["dev"],fallbackNS:!1,supportedLngs:!1,nonExplicitSupportedLngs:!1,load:"all",preload:!1,simplifyPluralSuffix:!0,keySeparator:".",nsSeparator:":",pluralSeparator:"_",contextSeparator:"_",partialBundledLanguages:!1,saveMissing:!1,updateMissing:!1,saveMissingTo:"fallback",saveMissingPlurals:!0,missingKeyHandler:!1,missingInterpolationHandler:!1,postProcess:!1,postProcessPassResolved:!1,returnNull:!0,returnEmptyString:!0,returnObjects:!1,joinArrays:!1,returnedObjectHandler:!1,parseMissingKeyHandler:!1,appendNamespaceToMissingKey:!1,appendNamespaceToCIMode:!1,overloadTranslationOptionHandler:function(t){var n={};if(fe(t[1])==="object"&&(n=t[1]),typeof t[1]=="string"&&(n.defaultValue=t[1]),typeof t[2]=="string"&&(n.tDescription=t[2]),fe(t[2])==="object"||fe(t[3])==="object"){var r=t[3]||t[2];Object.keys(r).forEach(function(o){n[o]=r[o]})}return n},interpolation:{escapeValue:!0,format:function(t,n,r,o){return t},prefix:"{{",suffix:"}}",formatSeparator:",",unescapePrefix:"-",nestingPrefix:"$t(",nestingSuffix:")",nestingOptionsSeparator:",",maxReplaces:1e3,skipOnVariables:!0}}}function su(e){return typeof e.ns=="string"&&(e.ns=[e.ns]),typeof e.fallbackLng=="string"&&(e.fallbackLng=[e.fallbackLng]),typeof e.fallbackNS=="string"&&(e.fallbackNS=[e.fallbackNS]),e.supportedLngs&&e.supportedLngs.indexOf("cimode")<0&&(e.supportedLngs=e.supportedLngs.concat(["cimode"])),e}function lu(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),n.push.apply(n,r)}return n}function ht(e){for(var t=1;t"u"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch{return!1}}function go(){}function uh(e){var t=Object.getOwnPropertyNames(Object.getPrototypeOf(e));t.forEach(function(n){typeof e[n]=="function"&&(e[n]=e[n].bind(e))})}var oi=function(e){Li(n,e);var t=lh(n);function n(){var r,o=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},i=arguments.length>1?arguments[1]:void 0;if(ct(this,n),r=t.call(this),Ii&&tn.call(Wt(r)),r.options=su(o),r.services={},r.logger=yt,r.modules={external:[]},uh(Wt(r)),i&&!r.isInitialized&&!o.isClone){if(!r.options.initImmediate)return r.init(o,i),oo(r,Wt(r));setTimeout(function(){r.init(o,i)},0)}return r}return dt(n,[{key:"init",value:function(){var o=this,i=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},s=arguments.length>1?arguments[1]:void 0;typeof i=="function"&&(s=i,i={}),!i.defaultNS&&i.defaultNS!==!1&&i.ns&&(typeof i.ns=="string"?i.defaultNS=i.ns:i.ns.indexOf("translation")<0&&(i.defaultNS=i.ns[0]));var l=iu();this.options=ht(ht(ht({},l),this.options),su(i)),this.options.compatibilityAPI!=="v1"&&(this.options.interpolation=ht(ht({},l.interpolation),this.options.interpolation)),i.keySeparator!==void 0&&(this.options.userDefinedKeySeparator=i.keySeparator),i.nsSeparator!==void 0&&(this.options.userDefinedNsSeparator=i.nsSeparator);function a(p){return p?typeof p=="function"?new p:p:null}if(!this.options.isClone){this.modules.logger?yt.init(a(this.modules.logger),this.options):yt.init(null,this.options);var u;this.modules.formatter?u=this.modules.formatter:typeof Intl<"u"&&(u=nh);var f=new Za(this.options);this.store=new Wf(this.options.resources,this.options);var d=this.services;d.logger=yt,d.resourceStore=this.store,d.languageUtils=f,d.pluralResolver=new Zf(f,{prepend:this.options.pluralSeparator,compatibilityJSON:this.options.compatibilityJSON,simplifyPluralSuffix:this.options.simplifyPluralSuffix}),u&&(!this.options.interpolation.format||this.options.interpolation.format===l.interpolation.format)&&(d.formatter=a(u),d.formatter.init(d,this.options),this.options.interpolation.format=d.formatter.format.bind(d.formatter)),d.interpolator=new eh(this.options),d.utils={hasLoadedNamespace:this.hasLoadedNamespace.bind(this)},d.backendConnector=new sh(a(this.modules.backend),d.resourceStore,d,this.options),d.backendConnector.on("*",function(p){for(var c=arguments.length,m=new Array(c>1?c-1:0),y=1;y1?c-1:0),y=1;y0&&h[0]!=="dev"&&(this.options.lng=h[0])}!this.services.languageDetector&&!this.options.lng&&this.logger.warn("init: no languageDetector is used and no lng is defined");var g=["getResource","hasResourceBundle","getResourceBundle","getDataByLanguage"];g.forEach(function(p){o[p]=function(){var c;return(c=o.store)[p].apply(c,arguments)}});var v=["addResource","addResources","addResourceBundle","removeResourceBundle"];v.forEach(function(p){o[p]=function(){var c;return(c=o.store)[p].apply(c,arguments),o}});var k=hr(),O=function(){var c=function(y,S){o.isInitialized&&!o.initializedStoreOnce&&o.logger.warn("init: i18next is already initialized. You should call init just once!"),o.isInitialized=!0,o.options.isClone||o.logger.log("initialized",o.options),o.emit("initialized",o.options),k.resolve(S),s(y,S)};if(o.languages&&o.options.compatibilityAPI!=="v1"&&!o.isInitialized)return c(null,o.t.bind(o));o.changeLanguage(o.options.lng,c)};return this.options.resources||!this.options.initImmediate?O():setTimeout(O,0),k}},{key:"loadResources",value:function(o){var i=this,s=arguments.length>1&&arguments[1]!==void 0?arguments[1]:go,l=s,a=typeof o=="string"?o:this.language;if(typeof o=="function"&&(l=o),!this.options.resources||this.options.partialBundledLanguages){if(a&&a.toLowerCase()==="cimode")return l();var u=[],f=function(g){if(g){var v=i.services.languageUtils.toResolveHierarchy(g);v.forEach(function(k){u.indexOf(k)<0&&u.push(k)})}};if(a)f(a);else{var d=this.services.languageUtils.getFallbackCodes(this.options.fallbackLng);d.forEach(function(h){return f(h)})}this.options.preload&&this.options.preload.forEach(function(h){return f(h)}),this.services.backendConnector.load(u,this.options.ns,function(h){!h&&!i.resolvedLanguage&&i.language&&i.setResolvedLanguage(i.language),l(h)})}else l(null)}},{key:"reloadResources",value:function(o,i,s){var l=hr();return o||(o=this.languages),i||(i=this.options.ns),s||(s=go),this.services.backendConnector.reload(o,i,function(a){l.resolve(),s(a)}),l}},{key:"use",value:function(o){if(!o)throw new Error("You are passing an undefined module! Please check the object you are passing to i18next.use()");if(!o.type)throw new Error("You are passing a wrong module! Please check the object you are passing to i18next.use()");return o.type==="backend"&&(this.modules.backend=o),(o.type==="logger"||o.log&&o.warn&&o.error)&&(this.modules.logger=o),o.type==="languageDetector"&&(this.modules.languageDetector=o),o.type==="i18nFormat"&&(this.modules.i18nFormat=o),o.type==="postProcessor"&&Uc.addPostProcessor(o),o.type==="formatter"&&(this.modules.formatter=o),o.type==="3rdParty"&&this.modules.external.push(o),this}},{key:"setResolvedLanguage",value:function(o){if(!(!o||!this.languages)&&!(["cimode","dev"].indexOf(o)>-1))for(var i=0;i-1)&&this.store.hasLanguageSomeTranslations(s)){this.resolvedLanguage=s;break}}}},{key:"changeLanguage",value:function(o,i){var s=this;this.isLanguageChangingTo=o;var l=hr();this.emit("languageChanging",o);var a=function(h){s.language=h,s.languages=s.services.languageUtils.toResolveHierarchy(h),s.resolvedLanguage=void 0,s.setResolvedLanguage(h)},u=function(h,g){g?(a(g),s.translator.changeLanguage(g),s.isLanguageChangingTo=void 0,s.emit("languageChanged",g),s.logger.log("languageChanged",g)):s.isLanguageChangingTo=void 0,l.resolve(function(){return s.t.apply(s,arguments)}),i&&i(h,function(){return s.t.apply(s,arguments)})},f=function(h){!o&&!h&&s.services.languageDetector&&(h=[]);var g=typeof h=="string"?h:s.services.languageUtils.getBestMatchFromCodes(h);g&&(s.language||a(g),s.translator.language||s.translator.changeLanguage(g),s.services.languageDetector&&s.services.languageDetector.cacheUserLanguage&&s.services.languageDetector.cacheUserLanguage(g)),s.loadResources(g,function(v){u(v,g)})};return!o&&this.services.languageDetector&&!this.services.languageDetector.async?f(this.services.languageDetector.detect()):!o&&this.services.languageDetector&&this.services.languageDetector.async?this.services.languageDetector.detect.length===0?this.services.languageDetector.detect().then(f):this.services.languageDetector.detect(f):f(o),l}},{key:"getFixedT",value:function(o,i,s){var l=this,a=function u(f,d){var h;if(fe(d)!=="object"){for(var g=arguments.length,v=new Array(g>2?g-2:0),k=2;k1&&arguments[1]!==void 0?arguments[1]:{};if(!this.isInitialized)return this.logger.warn("hasLoadedNamespace: i18next was not initialized",this.languages),!1;if(!this.languages||!this.languages.length)return this.logger.warn("hasLoadedNamespace: i18n.languages were undefined or empty",this.languages),!1;var l=this.resolvedLanguage||this.languages[0],a=this.options?this.options.fallbackLng:!1,u=this.languages[this.languages.length-1];if(l.toLowerCase()==="cimode")return!0;var f=function(g,v){var k=i.services.backendConnector.state["".concat(g,"|").concat(v)];return k===-1||k===2};if(s.precheck){var d=s.precheck(this,f);if(d!==void 0)return d}return!!(this.hasResourceBundle(l,o)||!this.services.backendConnector.backend||this.options.resources&&!this.options.partialBundledLanguages||f(l,o)&&(!a||f(u,o)))}},{key:"loadNamespaces",value:function(o,i){var s=this,l=hr();return this.options.ns?(typeof o=="string"&&(o=[o]),o.forEach(function(a){s.options.ns.indexOf(a)<0&&s.options.ns.push(a)}),this.loadResources(function(a){l.resolve(),i&&i(a)}),l):(i&&i(),Promise.resolve())}},{key:"loadLanguages",value:function(o,i){var s=hr();typeof o=="string"&&(o=[o]);var l=this.options.preload||[],a=o.filter(function(u){return l.indexOf(u)<0});return a.length?(this.options.preload=l.concat(a),this.loadResources(function(u){s.resolve(),i&&i(u)}),s):(i&&i(),Promise.resolve())}},{key:"dir",value:function(o){if(o||(o=this.resolvedLanguage||(this.languages&&this.languages.length>0?this.languages[0]:this.language)),!o)return"rtl";var i=["ar","shu","sqr","ssh","xaa","yhd","yud","aao","abh","abv","acm","acq","acw","acx","acy","adf","ads","aeb","aec","afb","ajp","apc","apd","arb","arq","ars","ary","arz","auz","avl","ayh","ayl","ayn","ayp","bbz","pga","he","iw","ps","pbt","pbu","pst","prp","prd","ug","ur","ydd","yds","yih","ji","yi","hbo","men","xmn","fa","jpr","peo","pes","prs","dv","sam","ckb"],s=this.services&&this.services.languageUtils||new Za(iu());return i.indexOf(s.getLanguagePartFromCode(o))>-1||o.toLowerCase().indexOf("-arab")>1?"rtl":"ltr"}},{key:"cloneInstance",value:function(){var o=this,i=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},s=arguments.length>1&&arguments[1]!==void 0?arguments[1]:go,l=ht(ht(ht({},this.options),i),{isClone:!0}),a=new n(l);(i.debug!==void 0||i.prefix!==void 0)&&(a.logger=a.logger.clone(i));var u=["store","services","language"];return u.forEach(function(f){a[f]=o[f]}),a.services=ht({},this.services),a.services.utils={hasLoadedNamespace:a.hasLoadedNamespace.bind(a)},a.translator=new Ja(a.services,a.options),a.translator.on("*",function(f){for(var d=arguments.length,h=new Array(d>1?d-1:0),g=1;g0&&arguments[0]!==void 0?arguments[0]:{},t=arguments.length>1?arguments[1]:void 0;return new oi(e,t)});var ce=oi.createInstance();ce.createInstance=oi.createInstance;ce.createInstance;ce.dir;ce.init;ce.loadResources;ce.reloadResources;ce.use;ce.changeLanguage;ce.getFixedT;ce.t;ce.exists;ce.setDefaultNamespace;ce.hasLoadedNamespace;ce.loadNamespaces;ce.loadLanguages;function ch(e,t){if(e==null)return{};var n={},r=Object.keys(e),o,i;for(i=0;i=0)&&(n[o]=e[o]);return n}function Al(e,t){if(e==null)return{};var n=ch(e,t),r,o;if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(o=0;o=0)&&Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var dh={area:!0,base:!0,br:!0,col:!0,embed:!0,hr:!0,img:!0,input:!0,link:!0,meta:!0,param:!0,source:!0,track:!0,wbr:!0},ph=/\s([^'"/\s><]+?)[\s/>]|([^\s=]+)=\s?(".*?"|'.*?')/g;function au(e){var t={type:"tag",name:"",voidElement:!1,attrs:{},children:[]},n=e.match(/<\/?([^\s]+?)[/\s>]/);if(n&&(t.name=n[1],(dh[n[1]]||e.charAt(e.length-2)==="/")&&(t.voidElement=!0),t.name.startsWith("!--"))){var r=e.indexOf("-->");return{type:"comment",comment:r!==-1?e.slice(4,r):""}}for(var o=new RegExp(ph),i=null;(i=o.exec(e))!==null;)if(i[0].trim())if(i[1]){var s=i[1].trim(),l=[s,""];s.indexOf("=")>-1&&(l=s.split("=")),t.attrs[l[0]]=l[1],o.lastIndex--}else i[2]&&(t.attrs[i[2]]=i[3].trim().substring(1,i[3].length-1));return t}var fh=/<[a-zA-Z0-9\-\!\/](?:"[^"]*"|'[^']*'|[^'">])*>/g,hh=/^\s*$/,mh=Object.create(null);function Ac(e,t){switch(t.type){case"text":return e+t.content;case"tag":return e+="<"+t.name+(t.attrs?function(n){var r=[];for(var o in n)r.push(o+'="'+n[o]+'"');return r.length?" "+r.join(" "):""}(t.attrs):"")+(t.voidElement?"/>":">"),t.voidElement?e:e+t.children.reduce(Ac,"")+"";case"comment":return e+""}}var gh={parse:function(e,t){t||(t={}),t.components||(t.components=mh);var n,r=[],o=[],i=-1,s=!1;if(e.indexOf("<")!==0){var l=e.indexOf("<");r.push({type:"text",content:l===-1?e:e.substring(0,l)})}return e.replace(fh,function(a,u){if(s){if(a!=="")return;s=!1}var f,d=a.charAt(1)!=="/",h=a.startsWith("");return{type:"comment",comment:r!==-1?e.slice(4,r):""}}for(var o=new RegExp(mh),i=null;(i=o.exec(e))!==null;)if(i[0].trim())if(i[1]){var s=i[1].trim(),l=[s,""];s.indexOf("=")>-1&&(l=s.split("=")),t.attrs[l[0]]=l[1],o.lastIndex--}else i[2]&&(t.attrs[i[2]]=i[3].trim().substring(1,i[3].length-1));return t}var gh=/<[a-zA-Z0-9\-\!\/](?:"[^"]*"|'[^']*'|[^'">])*>/g,vh=/^\s*$/,yh=Object.create(null);function Kc(e,t){switch(t.type){case"text":return e+t.content;case"tag":return e+="<"+t.name+(t.attrs?function(n){var r=[];for(var o in n)r.push(o+'="'+n[o]+'"');return r.length?" "+r.join(" "):""}(t.attrs):"")+(t.voidElement?"/>":">"),t.voidElement?e:e+t.children.reduce(Kc,"")+"";case"comment":return e+""}}var _h={parse:function(e,t){t||(t={}),t.components||(t.components=yh);var n,r=[],o=[],i=-1,s=!1;if(e.indexOf("<")!==0){var l=e.indexOf("<");r.push({type:"text",content:l===-1?e:e.substring(0,l)})}return e.replace(gh,function(a,u){if(s){if(a!=="")return;s=!1}var f,d=a.charAt(1)!=="/",h=a.startsWith(" > **NOTE**: This version is pre-release software and has not undergone full release validation or qualification. No support is offered on pre-release software and APIs/behavior are subject to change. It should NOT be incorporated into any production software/solution and instead should be used only for early testing and integration while awaiting a final release version of this software. +> **NOTE**: OpenVINO™ Development Tools package has been deprecated and will be discontinued with 2024.0 release. To learn more, refer to the [OpenVINO Legacy Features and Components page](https://docs.openvino.ai/2023.1/openvino_legacy_features.html). Intel® Distribution of OpenVINO™ toolkit is an open-source toolkit for optimizing and deploying AI inference. It can be used to develop applications and solutions based on deep learning tasks, such as: emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, etc. It provides high-performance and rich deployment options, from edge to cloud. diff --git a/docs/snippets/AUTO0.cpp b/docs/snippets/AUTO0.cpp index 54d720eb4bcc0d..124bdd900970a7 100644 --- a/docs/snippets/AUTO0.cpp +++ b/docs/snippets/AUTO0.cpp @@ -1,4 +1,6 @@ +//! [py_ov_property_import_header] #include +//! [py_ov_property_import_header] int main() { { diff --git a/docs/snippets/ov_auto.py b/docs/snippets/ov_auto.py index a665b509713f6e..47d8d877ecda24 100644 --- a/docs/snippets/ov_auto.py +++ b/docs/snippets/ov_auto.py @@ -2,10 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 # +#! [py_ov_property_import_header] import openvino as ov import openvino.properties as properties import openvino.properties.device as device import openvino.properties.hint as hints +import openvino.properties.streams as streams +#! [py_ov_property_import_header] import openvino.properties.log as log from openvino.inference_engine import IECore @@ -156,8 +159,23 @@ def part5(): core = ov.Core() # gpu_config and cpu_config will load during compile_model() - compiled_model = core.compile_model(model=model) - compiled_model = core.compile_model(model=model, device_name="AUTO") + gpu_config = { + hints.performance_mode: hints.PerformanceMode.THROUGHPUT, + streams.num: 4 + } + cpu_config = { + hints.performance_mode: hints.PerformanceMode.LATENCY, + streams.num: 8, + properties.enable_profiling: True + } + compiled_model = core.compile_model( + model=model, + device_name="AUTO", + config={ + device.priorities: "GPU,CPU", + device.properties: {'CPU': cpu_config, 'GPU': gpu_config} + } + ) #! [part5] diff --git a/docs/snippets/ov_multi.py b/docs/snippets/ov_multi.py index 1f852faea94c9c..e3ee1aa7bf7158 100644 --- a/docs/snippets/ov_multi.py +++ b/docs/snippets/ov_multi.py @@ -4,6 +4,7 @@ import openvino as ov import openvino.properties as properties import openvino.properties.device as device +import openvino.properties.streams as streams from utils import get_model model = get_model() @@ -96,15 +97,17 @@ def available_devices_2(): def MULTI_4(): #! [MULTI_4] core = ov.Core() - cpu_config = {} - gpu_config = {} + cpu_config = {streams.num : 4} + gpu_config = {streams.num : 8} # When compiling the model on MULTI, configure CPU and GPU # (devices, priorities, and device configurations; gpu_config and cpu_config will load during compile_model() ): compiled_model = core.compile_model( model=model, device_name="MULTI:GPU,CPU", - config={"CPU": "NUM_STREAMS 4", "GPU": "NUM_STREAMS 8"}, + config={ + device.properties: {'CPU': cpu_config, 'GPU': gpu_config} + } ) # Optionally, query the optimal number of requests: diff --git a/docs/snippets/ov_preprocessing.cpp b/docs/snippets/ov_preprocessing.cpp index f559a7a5a1aef4..176953f46691f1 100644 --- a/docs/snippets/ov_preprocessing.cpp +++ b/docs/snippets/ov_preprocessing.cpp @@ -165,7 +165,7 @@ int main() { //! [ov:preprocess:save_headers] void save_example() { - //! [ov:preprocess:save] + //! [ov:preprocess:save_model] // ======== Step 0: read original model ========= ov::Core core; std::shared_ptr model = core.read_model("/path/to/some_model.onnx"); @@ -200,7 +200,7 @@ void save_example() { std::string xml = "/path/to/some_model_saved.xml"; std::string bin = "/path/to/some_model_saved.bin"; ov::serialize(model, xml, bin); - //! [ov:preprocess:save] + //! [ov:preprocess:save_model] } diff --git a/docs/snippets/ov_preprocessing.py b/docs/snippets/ov_preprocessing.py index 23cd30548115ad..8a8f4ce212b4f7 100644 --- a/docs/snippets/ov_preprocessing.py +++ b/docs/snippets/ov_preprocessing.py @@ -184,7 +184,7 @@ def custom_abs(output: Output): model_path = get_path_to_model() serialized_model_path = get_path_to_model() -# ! [ov:preprocess:save] +# ! [ov:preprocess:save_model] # ======== Step 0: read original model ========= core = Core() model = core.read_model(model=model_path) @@ -219,7 +219,7 @@ def custom_abs(output: Output): # ======== Step 3: Save the model ================ serialize(model, serialized_model_path) -# ! [ov:preprocess:save] +# ! [ov:preprocess:save_model] path_to_cache_dir = get_temp_dir() diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index ab5abf8024c045..e2aeebc9c35e7f 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -5,7 +5,8 @@ add_subdirectory(cpp) add_subdirectory(c) -openvino_developer_export_targets(COMPONENT samples TARGETS format_reader ie_samples_utils) +ov_developer_package_export_targets(TARGET format_reader) +ov_developer_package_export_targets(TARGET ie_samples_utils) # # Install diff --git a/samples/cpp/common/format_reader/CMakeLists.txt b/samples/cpp/common/format_reader/CMakeLists.txt index 89732ca039a363..7be5f6af757501 100644 --- a/samples/cpp/common/format_reader/CMakeLists.txt +++ b/samples/cpp/common/format_reader/CMakeLists.txt @@ -30,7 +30,7 @@ else() target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV) endif() -target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" +target_include_directories(${TARGET_NAME} PUBLIC "$" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src") set_target_properties(${TARGET_NAME} PROPERTIES FOLDER cpp_samples) diff --git a/samples/cpp/common/utils/CMakeLists.txt b/samples/cpp/common/utils/CMakeLists.txt index 108818b94c0c23..f7dd66d67b58fd 100644 --- a/samples/cpp/common/utils/CMakeLists.txt +++ b/samples/cpp/common/utils/CMakeLists.txt @@ -11,7 +11,7 @@ add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${SOURCES}) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER "src") target_include_directories(${TARGET_NAME} - PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") + PUBLIC "$") find_package(OpenVINO REQUIRED COMPONENTS Runtime) diff --git a/src/bindings/c/src/CMakeLists.txt b/src/bindings/c/src/CMakeLists.txt index 8f5eaeac581735..a6a649ada6e990 100644 --- a/src/bindings/c/src/CMakeLists.txt +++ b/src/bindings/c/src/CMakeLists.txt @@ -46,6 +46,7 @@ export(TARGETS ${TARGET_NAME} NAMESPACE openvino:: APPEND FILE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") # install + ov_cpack_add_component(${OV_CPACK_COMP_CORE_C} HIDDEN) ov_cpack_add_component(${OV_CPACK_COMP_CORE_C_DEV} HIDDEN) diff --git a/src/bindings/python/CMakeLists.txt b/src/bindings/python/CMakeLists.txt index 8a572f89a0f47e..a2e8945a807776 100644 --- a/src/bindings/python/CMakeLists.txt +++ b/src/bindings/python/CMakeLists.txt @@ -20,7 +20,20 @@ project(OpenVINOPython DESCRIPTION "OpenVINO Runtime Python bindings") if(NOT DEFINED OpenVINO_SOURCE_DIR) find_package(OpenVINODeveloperPackage REQUIRED PATHS "${InferenceEngineDeveloperPackage_DIR}") + + # we assume that OpenVINODeveloperPackage is generated in OpenVINO build tree set(OpenVINO_BINARY_DIR "${OpenVINODeveloperPackage_DIR}") + # but this can be invalid for cases of OpenVINODeveloperPackage relocatable installation + # so, we need to disable wheen generation for this case + if(NOT EXISTS "${OpenVINO_BINARY_DIR}/cmake_install.cmake") + set(OpenVINODeveloperPackage_RELOCATABLE ON) + endif() + + set(OpenVINO_SOURCE_DIR "${OpenVINOPython_SOURCE_DIR}/../../../") +endif() + +if(NOT DEFINED OpenVINODeveloperPackage_RELOCATABLE) + set(OpenVINODeveloperPackage_RELOCATABLE OFF) endif() # @@ -141,11 +154,10 @@ function(ov_check_init_files_alignment init_files) endforeach() endfunction() -set(INIT_FILES_RUNTIME -"${OpenVINOPython_SOURCE_DIR}/src/openvino/__init__.py" -"${OpenVINOPython_SOURCE_DIR}/src/compatibility/openvino/__init__.py" -"${OpenVINO_SOURCE_DIR}/tools/ovc/openvino/__init__.py" -"${OpenVINO_SOURCE_DIR}/tools/benchmark_tool/openvino/__init__.py") +set(INIT_FILES_RUNTIME "${OpenVINOPython_SOURCE_DIR}/src/openvino/__init__.py" + "${OpenVINOPython_SOURCE_DIR}/src/compatibility/openvino/__init__.py" + "${OpenVINO_SOURCE_DIR}/tools/ovc/openvino/__init__.py" + "${OpenVINO_SOURCE_DIR}/tools/benchmark_tool/openvino/__init__.py") ov_check_init_files_alignment("${INIT_FILES_RUNTIME}") @@ -193,7 +205,7 @@ endif() # this option should not be a part of OpenVINODeveloperPackage # since wheels can be built only together with main OV build -ov_dependent_option(ENABLE_WHEEL "Build wheel packages for PyPI" ${ENABLE_WHEEL_DEFAULT} "ENABLE_PYTHON" OFF) +ov_dependent_option(ENABLE_WHEEL "Build wheel packages for PyPI" ${ENABLE_WHEEL_DEFAULT} "ENABLE_PYTHON;NOT OpenVINODeveloperPackage_RELOCATABLE" OFF) if(NOT ENABLE_PYTHON) if(CMAKE_SOURCE_DIR STREQUAL OpenVINOPython_SOURCE_DIR) diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 9db99017681f4f..20e888bde84984 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -5,7 +5,7 @@ numpy>=1.16.6,<1.27 # Python bindings, frontends pytest>=5.0,<7.5 pytest-dependency==0.5.1 pytest-html==3.2.0 -pytest-timeout==2.1.0 +pytest-timeout==2.2.0 # Python bindings py>=1.9.0 diff --git a/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake b/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake index 8d02cf9890a5be..8eeabf849f49c5 100644 --- a/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake +++ b/src/bindings/python/src/compatibility/openvino/cmake/CythonConfig.cmake @@ -36,9 +36,12 @@ function( _find_cython_executable ) get_filename_component( _python_path ${Python3_EXECUTABLE} PATH ) file(TO_CMAKE_PATH "$ENV{HOME}" ENV_HOME) find_host_program( CYTHON_EXECUTABLE - NAMES cython cython.bat cython3 - HINTS ${_python_path} ${ENV_HOME}/.local/bin $ENV{HOMEBREW_OPT}/cython/bin + NAMES cython cython.exe cython.bat cython3 + HINTS ${_python_path} + ${ENV_HOME}/.local/bin + $ENV{HOMEBREW_OPT}/cython/bin ${ENV_HOME}/Library/Python/${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/bin + ${_python_path}/Scripts ) else() find_host_program( CYTHON_EXECUTABLE diff --git a/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt b/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt index ba20fd76055cac..8b68d5dde8a5e9 100644 --- a/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt +++ b/src/bindings/python/src/compatibility/pyngraph/CMakeLists.txt @@ -7,27 +7,25 @@ cmake_minimum_required (VERSION 3.13) project (pyngraph) if(NOT DEFINED OpenVINO_SOURCE_DIR) + find_package(OpenVINO REQUIRED) find_package(OpenVINODeveloperPackage QUIET PATHS "${InferenceEngineDeveloperPackage_DIR}") - find_package(OpenVINO REQUIRED) endif() # Python3_VERSION_MAJOR and Python3_VERSION_MINOR are defined in FindPython3 set(pyversion python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}) -if(OpenVINO_SOURCE_DIR) - if(OV_GENERATOR_MULTI_CONFIG) - set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python/) - else() - set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python/) - endif() - - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) - set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) - set(CMAKE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +if(OV_GENERATOR_MULTI_CONFIG) + set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python/) +else() + set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python/) endif() +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +set(CMAKE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) + # compile options if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") @@ -54,43 +52,39 @@ file(GLOB_RECURSE SOURCES *.cpp) pybind11_add_module(_${PROJECT_NAME} MODULE NO_EXTRAS ${SOURCES}) -target_include_directories(_${PROJECT_NAME} PRIVATE "../" "${OpenVINO_SOURCE_DIR}/src/common/transformations/include") +target_include_directories(_${PROJECT_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../") -target_link_libraries(_${PROJECT_NAME} PRIVATE openvino::runtime) +target_link_libraries(_${PROJECT_NAME} PRIVATE openvino::runtime openvino::core::dev) set_target_properties(_${PROJECT_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) # perform copy -if(OpenVINO_SOURCE_DIR) - add_custom_command(TARGET _${PROJECT_NAME} - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/../ngraph ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/ngraph - ) -endif() +add_custom_command(TARGET _${PROJECT_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/../ngraph ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/ngraph +) ov_set_apple_rpath(_${PROJECT_NAME} ${OV_CPACK_PYTHONDIR} ${OV_CPACK_RUNTIMEDIR}) # Install -if(OpenVINO_SOURCE_DIR OR OpenVINODeveloperPackage_FOUND) - ov_python_minimal_api(_${PROJECT_NAME}) - ov_add_clang_format_target(_${PROJECT_NAME}_clang FOR_TARGETS _${PROJECT_NAME}) +ov_python_minimal_api(_${PROJECT_NAME}) +ov_add_clang_format_target(_${PROJECT_NAME}_clang FOR_TARGETS _${PROJECT_NAME}) - ov_cpack_add_component(${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} HIDDEN) +ov_cpack_add_component(${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} HIDDEN) - install(TARGETS _${PROJECT_NAME} - DESTINATION ${OV_CPACK_PYTHONDIR} - COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} - ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) +install(TARGETS _${PROJECT_NAME} + DESTINATION ${OV_CPACK_PYTHONDIR} + COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} + ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) - install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../ngraph - DESTINATION ${OV_CPACK_PYTHONDIR} - COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} - ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL} - USE_SOURCE_PERMISSIONS) +install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../ngraph + DESTINATION ${OV_CPACK_PYTHONDIR} + COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} + ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL} + USE_SOURCE_PERMISSIONS) - install(DIRECTORY ${OpenVINOPython_SOURCE_DIR}/tests_compatibility - DESTINATION tests/${PROJECT_NAME} - COMPONENT tests - EXCLUDE_FROM_ALL) -endif() +install(DIRECTORY ${OpenVINOPython_SOURCE_DIR}/tests_compatibility + DESTINATION tests/${PROJECT_NAME} + COMPONENT tests + EXCLUDE_FROM_ALL) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index 45a662e4e45fd1..a79892b3e4d6f5 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -370,7 +370,7 @@ def inlined_inputs(self, index): return result def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]: + if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]: # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that return False try: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py new file mode 100644 index 00000000000000..b4bd06552b2a1e --- /dev/null +++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py @@ -0,0 +1,140 @@ + +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# flake8: noqa +# mypy: ignore-errors + +import torch +from functools import partial + +# Wraps a single tensor to a module to prevent it from jit.freezing +# It depends on a tensor dtype whether it will be preserved from freezing. Refer to the decoder code to learn which types will be preserved. +class KeepWeight(torch.nn.Module): + + def __init__(self, weight): + super().__init__() + self.weight = torch.nn.Parameter(weight, requires_grad=False) + + def forward(self): + return self.weight + + +# Produces a pattern that can be captured later and represented as a single u4 constant node +def decompression_pattern(weights): + mask = torch.tensor(15, dtype=torch.uint8).to(weights.device) + return torch.stack((torch.bitwise_and(weights, mask), torch.bitwise_right_shift(weights, 4)), dim=-1) + + +def patched_forward(self, *args, **kwargs): + if hasattr(self, '_hf_hook'): + args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs) + + x = args[0] + dtype = x.dtype + outshape = x.shape[:-1] + (self.width,) + x = x.view(-1, x.shape[-1]) + groups = self.qzeros.shape[0] + height = self.qweight.shape[0] + + unpacked_weights = decompression_pattern( + self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8) + unpacked_weights = torch.transpose( + unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width) + unpacked_zp = decompression_pattern( + self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1) + + unpacked_zp = unpacked_zp.to(dtype) + 1 + + unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales + unpacked_weights = unpacked_weights.view(-1, self.width) + + out = x @ unpacked_weights + + out = out.view(outshape) + if self.bias is not None: + out.add_(self.bias) + + if hasattr(self, '_hf_hook'): + out = self._hf_hook.post_forward(self, out) + return out + + +# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema +supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old'] + + +def patch_model(model): + for name, m in model.named_modules(): + if hasattr(m, '_openvino_patch_orig_forward'): + # already patched, skipping + continue + # TODO: Check module type + is_quantized = getattr(m, 'is_quantized', None) + if is_quantized is not None: + m.is_quantized = False + m.float() # enables tracing on CPU, applied for all modules + if hasattr(m, 'QUANT_TYPE'): + if m.QUANT_TYPE not in supported_quant_types: + raise ValueError( + f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}') + if m.bits != 4: + raise ValueError( + f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.') + + int4_in_int32 = 8 + groups = m.qzeros.shape[0] + m.width = m.qweight.shape[1] + assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups + + m._openvino_patch_orig_forward = m.forward + m.forward = partial(patched_forward, m) + + # Keep original field properties to be used when model is returned back to its original state + m._openvino_patch_orig_qweights_type = m.qweight.dtype + m._openvino_patch_orig_qzeros_type = m.qzeros.dtype + m._openvino_patch_orig_scale_shape = m.scales.shape + + m.qweight = m.qweight.view(dtype=torch.uint8) + m.qzeros = m.qzeros.view(dtype=torch.uint8) + + # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules + m.add_module( + '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight)) + m.add_module('_openvino_u4_compression_submodule_qzeros', + KeepWeight(m.qzeros)) + + m.scales = m.scales.view(-1, 1, m.width) + + +def unpatch_model(model): + for _, m in model.named_modules(): + if hasattr(m, '_openvino_patch_orig_forward'): + try: + m.forward = m._openvino_patch_orig_forward + del m._openvino_patch_orig_forward + + m.qweight = m.qweight.view( + dtype=m._openvino_patch_orig_qweights_type) + del m._openvino_patch_orig_qweights_type + + m.qzeros = m.qzeros.view( + dtype=m._openvino_patch_orig_qzeros_type) + del m._openvino_patch_orig_qzeros_type + + m.scales = m.scales.view(m._openvino_patch_orig_scale_shape) + del m._openvino_patch_orig_scale_shape + + del m._openvino_u4_compression_submodule_qweights + del m._openvino_u4_compression_submodule_qzeros + except Exception as error: + print('[ WARNING ] Exception raised during GPTQ model unpatching. Depending on the exact issue it may lead to broken original model') + print(error) + + +def detect_gptq_model_raw(model): + return model and getattr(model, 'config', None) and getattr(model.config, 'quantization_config', None) and model.config.quantization_config.quant_method == 'gptq' + + +def detect_gptq_model(model): + return detect_gptq_model_raw(model) or getattr(model, 'model', None) and detect_gptq_model_raw(model.model) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index b6caf22cfc7b68..11d5991e700c42 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -9,6 +9,7 @@ from openvino.runtime import op, PartialShape, Type as OVType, OVAny from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, prepare_example_inputs_and_model, convert_quantized_tensor from openvino.runtime import opset11 as ops +from openvino.frontend.pytorch import gptq import typing import torch @@ -84,8 +85,27 @@ def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False) if example_inputs is None: scripted = torch.jit.script(pt_module) else: - input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model(example_inputs, input_params, pt_module) - scripted = torch.jit.trace(pt_module, **input_parameters, strict=False) + input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model( + example_inputs, input_params, pt_module) + gptq_patched = False + + if gptq.detect_gptq_model(pt_module): + try: + gptq.patch_model(pt_module) + gptq_patched = True + except Exception as error: + print('[ WARNING ] Failed patching of AutoGPTQ model. Error message:\n', error) + print('[ WARNING ] Tracing of the model will likely be unsuccesfull or incorrect') + gptq.unpatch_model(pt_module) + gptq_patched = False + + try: + scripted = torch.jit.trace( + pt_module, **input_parameters, strict=False) + finally: + if gptq_patched: + gptq.unpatch_model(pt_module) + if not skip_freeze: for n in scripted.inlined_graph.nodes(): # TODO: switch off freezing for all traced models @@ -341,7 +361,7 @@ def input_is_none(self, index: int) -> bool: return False def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]: + if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]: # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that return False try: diff --git a/src/bindings/python/src/openvino/runtime/opset13/__init__.py b/src/bindings/python/src/openvino/runtime/opset13/__init__.py index 4ea991bf77b9ec..9cdb7149569ebb 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset13/__init__.py @@ -106,6 +106,7 @@ from openvino.runtime.opset4.ops import mish from openvino.runtime.opset1.ops import mod from openvino.runtime.opset9.ops import multiclass_nms +from openvino.runtime.opset13.ops import multinomial from openvino.runtime.opset1.ops import multiply from openvino.runtime.opset6.ops import mvn from openvino.runtime.opset1.ops import negative diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops.py b/src/bindings/python/src/openvino/runtime/opset13/ops.py index f864d7fccca0ea..fff95b33d234d6 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset13/ops.py @@ -110,6 +110,47 @@ def bitwise_xor( ) +@nameable_op +def multinomial( + probs: NodeInput, + num_samples: NodeInput, + convert_type: str, + with_replacement: bool, + log_probs: bool, + global_seed: int = 0, + op_seed: int = 0, +) -> Node: + """Return a node which generates a sequence of class indices sampled from the multinomial distribution. + + :param probs: Tensor with probabilities of floating-point type, and shape [class_size] or [batch_size, class_size]. + :param num_samples: Tensor (scalar or 1D) a single element of type i32 or i64, + specifying the number of samples to draw from the multinomial distribution. + :param convert_type: Specifies the output tensor type, possible values: 'i64', 'i32'. + :param with_replacement: Flag that specifies whether to sample with replacement. + :param log_probs: Flag that specifies whether *probs* should be treated as unnormalized log probabilities. + :param global_seed: Specifies global seed value. Required to be a positive integer or 0. + :param op_seed: Specifies operational seed value. Required to be a positive integer or 0. + + :return: The new node performing Multinomial operation. + """ + inputs = as_nodes(probs, num_samples) + + if global_seed < 0: + raise RuntimeError(f"global_seed should be positive or 0. Got: {global_seed}") + + if op_seed < 0: + raise RuntimeError(f"op_seed should be positive or 0. Got: {op_seed}") + + attributes = { + "convert_type": convert_type, + "with_replacement": with_replacement, + "log_probs": log_probs, + "global_seed": global_seed, + "op_seed": op_seed, + } + return _get_node_factory_opset13().create("Multinomial", inputs, attributes) + + @nameable_op def nms_rotated( boxes: NodeInput, diff --git a/src/bindings/python/src/pyopenvino/CMakeLists.txt b/src/bindings/python/src/pyopenvino/CMakeLists.txt index 99ae9983ee82c5..5566c961d1a57b 100644 --- a/src/bindings/python/src/pyopenvino/CMakeLists.txt +++ b/src/bindings/python/src/pyopenvino/CMakeLists.txt @@ -10,19 +10,17 @@ endif() # Python3_VERSION_MAJOR and Python3_VERSION_MINOR are defined by FindPython3 set(pyversion python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}) -if(OpenVINO_SOURCE_DIR) - if(OV_GENERATOR_MULTI_CONFIG) - set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python/openvino) - else() - set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python/openvino) - endif() - - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) - set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) - set(CMAKE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +if(OV_GENERATOR_MULTI_CONFIG) + set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python/openvino) +else() + set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python/openvino) endif() +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) +set(CMAKE_PDB_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}) + # compile options if(OV_COMPILER_IS_APPLECLANG) @@ -64,15 +62,8 @@ list(FILTER SOURCES EXCLUDE REGEX ".*(frontend/(onnx|tensorflow|paddle|pytorch)) pybind11_add_module(${PROJECT_NAME} MODULE NO_EXTRAS ${SOURCES}) -if(TARGET offline_transformations) - set(OFFLINE_TRANSFORMATIONS_LIB offline_transformations) -else() - set(OFFLINE_TRANSFORMATIONS_LIB openvino::offline_transformations) -endif() - target_include_directories(${PROJECT_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") -target_link_libraries(${PROJECT_NAME} PRIVATE - openvino::core::dev openvino::runtime ${OFFLINE_TRANSFORMATIONS_LIB}) +target_link_libraries(${PROJECT_NAME} PRIVATE openvino::core::dev openvino::runtime openvino::offline_transformations) set_target_properties(${PROJECT_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO} OUTPUT_NAME "_pyopenvino") @@ -96,58 +87,56 @@ if(OV_GENERATOR_MULTI_CONFIG) endif() # perform copy -if(OpenVINO_SOURCE_DIR) - add_custom_command(TARGET ${PROJECT_NAME} - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory ${OpenVINOPython_SOURCE_DIR}/src/openvino ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} - COMMAND ${CMAKE_COMMAND} -E copy ${OpenVINOPython_SOURCE_DIR}/requirements.txt ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../requirements.txt - ) -endif() - -if(OpenVINO_SOURCE_DIR OR OpenVINODeveloperPackage_FOUND) - ov_python_minimal_api(${PROJECT_NAME}) - ov_add_clang_format_target(${PROJECT_NAME}_clang FOR_TARGETS ${PROJECT_NAME}) - - ov_cpack_add_component(${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} - HIDDEN) - - install(DIRECTORY ${OpenVINOPython_SOURCE_DIR}/src/openvino - DESTINATION ${OV_CPACK_PYTHONDIR} - COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} - ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL} - USE_SOURCE_PERMISSIONS - PATTERN "test_utils" EXCLUDE - PATTERN "torchvision/requirements.txt" EXCLUDE) - - install(TARGETS ${PROJECT_NAME} - DESTINATION ${OV_CPACK_PYTHONDIR}/openvino - COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} - ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) - - ov_set_apple_rpath(${PROJECT_NAME} ${OV_CPACK_PYTHONDIR}/openvino - # path to OpenVINO C++ libraries - ${OV_CPACK_RUNTIMEDIR} - # pyopenvino also depends on TBB because of: - # pyopenvino => openvino::offline_transformations => TBB optimized openvino::reference - ${TBB_LIB_INSTALL_DIR}) - - ov_cpack_add_component(${OV_CPACK_COMP_OPENVINO_REQ_FILES} HIDDEN) - - install(FILES ${OpenVINOPython_SOURCE_DIR}/requirements.txt - DESTINATION ${OV_CPACK_PYTHONDIR} - COMPONENT ${OV_CPACK_COMP_OPENVINO_REQ_FILES} - ${OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL}) - - install(FILES ${OpenVINOPython_SOURCE_DIR}/src/openvino/preprocess/torchvision/requirements.txt - DESTINATION ${OV_CPACK_PYTHONDIR}/openvino/preprocess/torchvision - COMPONENT ${OV_CPACK_COMP_OPENVINO_REQ_FILES} - ${OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL}) - - install(DIRECTORY ${OpenVINOPython_SOURCE_DIR}/tests - DESTINATION tests/${PROJECT_NAME} - COMPONENT tests - EXCLUDE_FROM_ALL) -endif() +add_custom_command(TARGET ${PROJECT_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory ${OpenVINOPython_SOURCE_DIR}/src/openvino ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} + COMMAND ${CMAKE_COMMAND} -E copy ${OpenVINOPython_SOURCE_DIR}/requirements.txt ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../requirements.txt + ) + +ov_python_minimal_api(${PROJECT_NAME}) +ov_add_clang_format_target(${PROJECT_NAME}_clang FOR_TARGETS ${PROJECT_NAME}) + +# install steps + +ov_cpack_add_component(${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} + HIDDEN) + +install(DIRECTORY ${OpenVINOPython_SOURCE_DIR}/src/openvino + DESTINATION ${OV_CPACK_PYTHONDIR} + COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} + ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL} + USE_SOURCE_PERMISSIONS + PATTERN "test_utils" EXCLUDE + PATTERN "torchvision/requirements.txt" EXCLUDE) + +install(TARGETS ${PROJECT_NAME} + DESTINATION ${OV_CPACK_PYTHONDIR}/openvino + COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO}_${pyversion} + ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) + +ov_set_apple_rpath(${PROJECT_NAME} ${OV_CPACK_PYTHONDIR}/openvino + # path to OpenVINO C++ libraries + ${OV_CPACK_RUNTIMEDIR} + # pyopenvino also depends on TBB because of: + # pyopenvino => openvino::offline_transformations => TBB optimized openvino::reference + ${TBB_LIB_INSTALL_DIR}) + +ov_cpack_add_component(${OV_CPACK_COMP_OPENVINO_REQ_FILES} HIDDEN) + +install(FILES ${OpenVINOPython_SOURCE_DIR}/requirements.txt + DESTINATION ${OV_CPACK_PYTHONDIR} + COMPONENT ${OV_CPACK_COMP_OPENVINO_REQ_FILES} + ${OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL}) + +install(FILES ${OpenVINOPython_SOURCE_DIR}/src/openvino/preprocess/torchvision/requirements.txt + DESTINATION ${OV_CPACK_PYTHONDIR}/openvino/preprocess/torchvision + COMPONENT ${OV_CPACK_COMP_OPENVINO_REQ_FILES} + ${OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL}) + +install(DIRECTORY ${OpenVINOPython_SOURCE_DIR}/tests + DESTINATION tests/${PROJECT_NAME} + COMPONENT tests + EXCLUDE_FROM_ALL) if(TARGET ie_wheel) add_dependencies(ie_wheel ${PROJECT_NAME}) diff --git a/src/bindings/python/tests/test_graph/test_multinomial.py b/src/bindings/python/tests/test_graph/test_multinomial.py new file mode 100644 index 00000000000000..a1275837cc39d8 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_multinomial.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +import openvino.runtime.opset13 as ops +from openvino.runtime import PartialShape, Dimension, Type + + +@pytest.mark.parametrize( + ("probs_shape", "num_samples_shape", "convert_type", "with_replacement", "log_probs", "global_seed", "op_seed", "expected_out_shape"), + [ + ([4, 16], [], "i32", False, True, 7461, 1546, PartialShape([4, -1])), + ([8], [1], "i64", True, False, 0, 0, PartialShape([-1])), + ], +) +def test_multinomial_param_inputs(probs_shape, num_samples_shape, convert_type, with_replacement, log_probs, global_seed, op_seed, expected_out_shape): + probs = ops.parameter(probs_shape, dtype=np.float32) + num_samples = ops.parameter(num_samples_shape, dtype=np.int32) + + op = ops.multinomial(probs, num_samples, + convert_type=convert_type, + with_replacement=with_replacement, + log_probs=log_probs, + global_seed=global_seed, + op_seed=op_seed) + assert op.get_output_size() == 1 + assert op.get_type_name() == "Multinomial" + assert op.get_output_element_type(0) == Type.i32 if convert_type == "i32" else Type.i64 + assert op.get_output_partial_shape(0) == expected_out_shape + + +@pytest.mark.parametrize( + ("probs_array", "num_samples_val", "convert_type", "with_replacement", "log_probs", "global_seed", "op_seed", "expected_out_shape"), + [ + (np.array([0.7, 0.3, 0.6, 0.5]), 3, "i32", False, True, 111, 222, PartialShape([3])), + (np.array([[0.7, 0.3], [0.6, 0.5]]), 2, "i64", True, False, 111, 222, PartialShape([2, 2])), + ], +) +def test_multinomial_const_inputs(probs_array, num_samples_val, convert_type, with_replacement, log_probs, global_seed, op_seed, expected_out_shape): + probs = ops.constant(probs_array, dtype=np.float32) + num_samples = ops.constant(num_samples_val, dtype=np.int32) + + op = ops.multinomial(probs, num_samples, + convert_type=convert_type, + with_replacement=with_replacement, + log_probs=log_probs, + global_seed=global_seed, + op_seed=op_seed) + + assert op.get_output_size() == 1 + assert op.get_type_name() == "Multinomial" + assert op.get_output_element_type(0) == Type.i32 if convert_type == "i32" else Type.i64 + assert op.get_output_partial_shape(0) == expected_out_shape + + +@pytest.mark.parametrize( + ("probs_shape", "num_samples_shape", "convert_type", "with_replacement", "log_probs", "expected_out_shape"), + [ + ([10], [1], "i32", True, True, PartialShape([-1])), + ([2, 16], [], "i64", False, False, PartialShape([2, -1])), + ], +) +def test_multinomial_default_attrs(probs_shape, num_samples_shape, convert_type, with_replacement, log_probs, expected_out_shape): + probs = ops.parameter(probs_shape, dtype=np.float32) + num_samples = ops.parameter(num_samples_shape, dtype=np.int32) + + op = ops.multinomial(probs, num_samples, + convert_type=convert_type, + with_replacement=with_replacement, + log_probs=log_probs) + + assert op.get_output_size() == 1 + assert op.get_type_name() == "Multinomial" + assert op.get_output_element_type(0) == Type.i32 if convert_type == "i32" else Type.i64 + assert op.get_output_partial_shape(0) == expected_out_shape diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake index ba3786cd697a75..7fb6e2fd77bd6b 100644 --- a/src/cmake/openvino.cmake +++ b/src/cmake/openvino.cmake @@ -107,27 +107,27 @@ install(TARGETS ${TARGET_NAME} EXPORT OpenVINOTargets # Add openvino::runtine::dev target # -add_library(${TARGET_NAME}_dev INTERFACE) -add_library(openvino::runtime::dev ALIAS ${TARGET_NAME}_dev) +add_library(openvino_runtime_dev INTERFACE) +add_library(openvino::runtime::dev ALIAS openvino_runtime_dev) -target_include_directories(${TARGET_NAME}_dev INTERFACE +target_include_directories(openvino_runtime_dev INTERFACE $ $ - $) + $>) -target_compile_definitions(${TARGET_NAME}_dev INTERFACE +target_compile_definitions(openvino_runtime_dev INTERFACE $) -target_link_libraries(${TARGET_NAME}_dev INTERFACE ${TARGET_NAME} openvino::core::dev) +target_link_libraries(openvino_runtime_dev INTERFACE ${TARGET_NAME} openvino::core::dev) -# TODO: remove once NPU will use explicltly `ov_set_threading_interface_for` -ov_set_threading_interface_for(${TARGET_NAME}_dev) -set_target_properties(${TARGET_NAME}_dev PROPERTIES EXPORT_NAME runtime::dev) +ov_set_threading_interface_for(openvino_runtime_dev) +set_target_properties(openvino_runtime_dev PROPERTIES EXPORT_NAME runtime::dev) -openvino_developer_export_targets(COMPONENT core TARGETS openvino::runtime::dev) +ov_developer_package_export_targets(TARGET openvino::runtime::dev + INSTALL_INCLUDE_DIRECTORIES "${OpenVINO_SOURCE_DIR}/src/inference/dev_api/") # Install static libraries for case BUILD_SHARED_LIBS=OFF -ov_install_static_lib(${TARGET_NAME}_dev ${OV_CPACK_COMP_CORE}) +ov_install_static_lib(openvino_runtime_dev ${OV_CPACK_COMP_CORE}) # # Install OpenVINO runtime diff --git a/src/common/conditional_compilation/CMakeLists.txt b/src/common/conditional_compilation/CMakeLists.txt index 876558cea5e474..8f5cd90fe22d21 100644 --- a/src/common/conditional_compilation/CMakeLists.txt +++ b/src/common/conditional_compilation/CMakeLists.txt @@ -2,11 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 # -set(TARGET_NAME conditional_compilation) +set(TARGET_NAME openvino_conditional_compilation) add_library(${TARGET_NAME} INTERFACE) add_library(openvino::conditional_compilation ALIAS ${TARGET_NAME}) +set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME conditional_compilation) target_link_libraries(${TARGET_NAME} INTERFACE openvino::itt) @@ -23,9 +24,7 @@ elseif(SELECTIVE_BUILD STREQUAL "ON") find_host_package (Python3 REQUIRED COMPONENTS Interpreter) file(TO_CMAKE_PATH ${SELECTIVE_BUILD_STAT} CMAKE_SELECTIVE_BUILD_STAT) - file(GLOB STAT_FILES ${CMAKE_SELECTIVE_BUILD_STAT}) - if(NOT STAT_FILES) message(FATAL_ERROR "SELECTIVE_BUILD_STAT (${SELECTIVE_BUILD_STAT}) path doesn't contain valid csv files!") endif() @@ -56,12 +55,16 @@ elseif(SELECTIVE_BUILD STREQUAL "ON") ov_force_include(${TARGET_NAME} INTERFACE ${GENERATED_HEADER}) endif() -ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) - file(GLOB_RECURSE hdrs ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_SOURCES ${hdrs}) -openvino_developer_export_targets(COMPONENT openvino_common TARGETS openvino::conditional_compilation) if(ENABLE_TESTS) add_subdirectory(tests) endif() + +# install & export + +ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) + +ov_developer_package_export_targets(TARGET openvino::conditional_compilation + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") diff --git a/src/common/itt/CMakeLists.txt b/src/common/itt/CMakeLists.txt index 512574fd89bc06..4541fa112755a7 100644 --- a/src/common/itt/CMakeLists.txt +++ b/src/common/itt/CMakeLists.txt @@ -36,7 +36,11 @@ endif() target_include_directories(${TARGET_NAME} PUBLIC $) +ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) + +# install & export + ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) -ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) -openvino_developer_export_targets(COMPONENT openvino_common TARGETS openvino::itt) +ov_developer_package_export_targets(TARGET openvino::itt + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") diff --git a/src/common/low_precision_transformations/include/itt.hpp b/src/common/low_precision_transformations/include/itt.hpp index 37e01b9cfc4162..f9388c2facc557 100644 --- a/src/common/low_precision_transformations/include/itt.hpp +++ b/src/common/low_precision_transformations/include/itt.hpp @@ -23,12 +23,12 @@ namespace domains { } // namespace itt } // namespace low_precision } // namespace pass -} // namespace ngraph +} // namespace ov /* * RUN_ON_FUNCTION_SCOPE macro allows to disable the run_on_function pass * MATCHER_SCOPE macro allows to disable the MatcherPass if matcher isn't applied - * INTERNAL_OP_SCOPE macro allows to disable parts of internal nGraph operations if they are not used + * INTERNAL_OP_SCOPE macro allows to disable parts of internal openvino operations if they are not used */ #if defined(SELECTIVE_BUILD_ANALYZER) diff --git a/src/common/low_precision_transformations/include/low_precision/network_helper.hpp b/src/common/low_precision_transformations/include/low_precision/network_helper.hpp index d3c5a04d14df6d..83e486af697ff7 100644 --- a/src/common/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/src/common/low_precision_transformations/include/low_precision/network_helper.hpp @@ -29,7 +29,7 @@ namespace pass { namespace low_precision { /** -* @brief NetworkHelper class encapsulates manipulations with nGraph function. +* @brief NetworkHelper class encapsulates manipulations with ov::Model. */ class LP_TRANSFORMATIONS_API NetworkHelper { public: diff --git a/src/common/low_precision_transformations/src/batch_to_space.cpp b/src/common/low_precision_transformations/src/batch_to_space.cpp index b136d284ed5902..cc80f95707eb70 100644 --- a/src/common/low_precision_transformations/src/batch_to_space.cpp +++ b/src/common/low_precision_transformations/src/batch_to_space.cpp @@ -5,10 +5,9 @@ #include "low_precision/batch_to_space.hpp" #include -#include -#include -#include +#include "openvino/op/batch_to_space.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "low_precision/network_helper.hpp" #include "itt.hpp" @@ -20,7 +19,7 @@ BatchToSpaceTransformation::BatchToSpaceTransformation(const Params& params) : L MATCHER_SCOPE(BatchToSpaceTransformation); auto matcher = pattern::wrap_type(); - ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) { auto op = m.get_match_root(); if (transformation_callback(op)) { return false; @@ -28,7 +27,7 @@ BatchToSpaceTransformation::BatchToSpaceTransformation(const Params& params) : L return transform(*context, m); }; - auto m = std::make_shared(matcher, matcher_name); + auto m = std::make_shared(matcher, matcher_name); this->register_matcher(m, callback); } @@ -45,7 +44,7 @@ bool BatchToSpaceTransformation::canBeTransformed(const TransformationContext& c return dequantization.isPerTensor(); } -bool BatchToSpaceTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { +bool BatchToSpaceTransformation::transform(TransformationContext& context, ov::pass::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/src/common/low_precision_transformations/src/network_helper.cpp b/src/common/low_precision_transformations/src/network_helper.cpp index 29d7c9670bb362..b3925c850c5673 100644 --- a/src/common/low_precision_transformations/src/network_helper.cpp +++ b/src/common/low_precision_transformations/src/network_helper.cpp @@ -195,7 +195,7 @@ size_t NetworkHelper::getGroupsCount(std::shared_ptr layer) { } void NetworkHelper::removeLayer(std::shared_ptr layer) { - ngraph::replace_output_update_name(layer->output(0), layer->input_value(0)); + ov::replace_output_update_name(layer->output(0), layer->input_value(0)); } std::shared_ptr NetworkHelper::swapMultiplyAndAdd(std::shared_ptr addAfterMultiply, const int multiplyBranch) { diff --git a/src/common/low_precision_transformations/src/reshape.cpp b/src/common/low_precision_transformations/src/reshape.cpp index 487139077f5c69..0c5f83502df4e8 100644 --- a/src/common/low_precision_transformations/src/reshape.cpp +++ b/src/common/low_precision_transformations/src/reshape.cpp @@ -200,7 +200,7 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex const auto inputs = op->get_output_target_inputs(0); if (inputs.size() == 1ul) { const auto consumer = inputs.begin()->get_node(); - ignorePerTensorQuantizationCheck = ngraph::as_type(consumer) != nullptr; + ignorePerTensorQuantizationCheck = ov::as_type(consumer) != nullptr; } } diff --git a/src/common/low_precision_transformations/src/space_to_batch.cpp b/src/common/low_precision_transformations/src/space_to_batch.cpp index 0c9200a2f061eb..75bf0f9dbbc559 100644 --- a/src/common/low_precision_transformations/src/space_to_batch.cpp +++ b/src/common/low_precision_transformations/src/space_to_batch.cpp @@ -5,11 +5,9 @@ #include "low_precision/space_to_batch.hpp" #include -#include -#include - -#include +#include "openvino/op/space_to_batch.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "low_precision/network_helper.hpp" #include "itt.hpp" @@ -21,7 +19,7 @@ SpaceToBatchTransformation::SpaceToBatchTransformation(const Params& params) : L MATCHER_SCOPE(SpaceToBatchTransformation); auto matcher = pattern::wrap_type(); - ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) { auto op = m.get_match_root(); if (transformation_callback(op)) { return false; @@ -29,7 +27,7 @@ SpaceToBatchTransformation::SpaceToBatchTransformation(const Params& params) : L return transform(*context, m); }; - auto m = std::make_shared(matcher, matcher_name); + auto m = std::make_shared(matcher, matcher_name); this->register_matcher(m, callback); } @@ -46,7 +44,7 @@ bool SpaceToBatchTransformation::canBeTransformed(const TransformationContext& c return dequantization.isPerTensor(); } -bool SpaceToBatchTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { +bool SpaceToBatchTransformation::transform(TransformationContext& context, ov::pass::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/src/common/low_precision_transformations/src/transpose.cpp b/src/common/low_precision_transformations/src/transpose.cpp index e2c098103a5677..eb40b4e183abaa 100644 --- a/src/common/low_precision_transformations/src/transpose.cpp +++ b/src/common/low_precision_transformations/src/transpose.cpp @@ -116,7 +116,7 @@ bool TransposeTransformation::canBeTransformed(const TransformationContext& cont } } if (dequantization.multiply != nullptr) { - const auto mulConst = ov::as_type_ptr(dequantization.multiplyConstant); + const auto mulConst = ov::as_type_ptr(dequantization.multiplyConstant); if (!NetworkHelper::isScalarLike(mulConst)) { return false; } diff --git a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp index d3dd47d2107737..dd6995efc6e957 100644 --- a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -163,7 +163,7 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext // // [1] no other consumers for FQ sitting on weights (neither Result node, nor any others - // original code includes separate checks for node being output and other consumers present; for - // ngraph it is a single check for number of consumers). + // openvino it is a single check for number of consumers). // // [2] if weights is anything except a constant with data_type other than i8; this check is overriden by // stronger check from Convolution patter which expects FQ only on weights diff --git a/src/common/low_precision_transformations/tests/get_dequantization_transformation.cpp b/src/common/low_precision_transformations/tests/get_dequantization_transformation.cpp index 0d5bad38902cb4..9c378f33b69406 100644 --- a/src/common/low_precision_transformations/tests/get_dequantization_transformation.cpp +++ b/src/common/low_precision_transformations/tests/get_dequantization_transformation.cpp @@ -26,7 +26,7 @@ using namespace ngraph::builder::subgraph; class GetDequantizationTestValues { public: FakeQuantizeOnData fakeQuantize; - // actual dequantization to create nGraph function to run NetworkHelper::getDequantization + // actual dequantization to create ov::Model to run NetworkHelper::getDequantization DequantizationOperations actualDequantization; DequantizationOperations expectedDequantization; }; diff --git a/src/common/offline_transformations/CMakeLists.txt b/src/common/offline_transformations/CMakeLists.txt index 6712f2f28586e3..69335b19be4e7a 100644 --- a/src/common/offline_transformations/CMakeLists.txt +++ b/src/common/offline_transformations/CMakeLists.txt @@ -2,13 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 # -set(TARGET_NAME "offline_transformations") - -file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) -file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) +set(TARGET_NAME "openvino_offline_transformations") set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") +file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/*.hpp) + # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj @@ -19,15 +19,20 @@ source_group("include" FILES ${PUBLIC_HEADERS}) add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS}) +add_library(openvino::offline_transformations ALIAS ${TARGET_NAME}) +set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME offline_transformations) + target_link_libraries(${TARGET_NAME} PRIVATE openvino::core::dev openvino::reference openvino::runtime) -target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR} - PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src") +target_include_directories(${TARGET_NAME} PUBLIC $ + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src" + $) add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) -# developer package +# install & export -openvino_developer_export_targets(COMPONENT core TARGETS ${TARGET_NAME}) +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") diff --git a/src/common/preprocessing/src/CMakeLists.txt b/src/common/preprocessing/src/CMakeLists.txt index ccab33a652c633..9e3fd2d3789a02 100644 --- a/src/common/preprocessing/src/CMakeLists.txt +++ b/src/common/preprocessing/src/CMakeLists.txt @@ -211,7 +211,7 @@ endif() # developer package -openvino_developer_export_targets(COMPONENT core TARGETS ${TARGET_NAME}) +ov_developer_package_export_targets(TARGET ${TARGET_NAME}) # install diff --git a/src/common/snippets/CMakeLists.txt b/src/common/snippets/CMakeLists.txt index fdc1c83889423d..b3d2db77b77241 100644 --- a/src/common/snippets/CMakeLists.txt +++ b/src/common/snippets/CMakeLists.txt @@ -46,8 +46,9 @@ endif() set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) -# install +# install & export ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) -openvino_developer_export_targets(COMPONENT ${OV_CPACK_COMP_CORE} TARGETS ${TARGET_NAME}) +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${PUBLIC_HEADERS_DIR}/") diff --git a/src/common/snippets/docs/snippets_design_guide.md b/src/common/snippets/docs/snippets_design_guide.md index 83adce2171cccc..2f157b620b9574 100644 --- a/src/common/snippets/docs/snippets_design_guide.md +++ b/src/common/snippets/docs/snippets_design_guide.md @@ -236,7 +236,7 @@ Finally, the `Backend` uses the optimized `IR` to produce executable code. As shown on the figure below, `Snippets` are organized in a very similar way. ```mermaid graph LR - Source[nGraph \n model] + Source[OpenVINO \n model] subgraph Snippets direction LR subgraph Optimizer[Optimizer] @@ -244,10 +244,10 @@ As shown on the figure below, `Snippets` are organized in a very similar way. Data[Data flow \n optimizations] Converter[Convert \n IR] Control[Control flow \n optimizations] - Data-->|nGraph \nIR|Converter + Data-->|OpenVINO \nIR|Converter Converter-->|Linear \nIR|Control end - Frontend[Tokenizer]-->|nGraph \nIR|Data + Frontend[Tokenizer]-->|OpenVINO \nIR|Data Control-->|Linear \nIR|Backend[Generator] end Source --> Frontend @@ -258,13 +258,13 @@ classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626 class Frontend,Optimizer,Backend steel1 class Source,Executable daisy1 ``` -Instead of a source code, `Snippets` take `nGraph` model as an input. -Then the `Tokenizer` (which is essentially a `Snippets` `Frontend`) parses an input `nGraph model`, and tries to find a part of the model that could be processed by `Snippets`. -If such a part is found, `Tokenizer` converts it to an `nGraph IR` and stores inside a `Subgraph` node. -`nGraph IR` - is one of the two `IR` types used by `Snippets`, it is simply a small `nGraph model` that can contain `Snippets`-specific operations. +Instead of a source code, `Snippets` take `OpenVINO` model as an input. +Then the `Tokenizer` (which is essentially a `Snippets` `Frontend`) parses an input `OpenVINO model`, and tries to find a part of the model that could be processed by `Snippets`. +If such a part is found, `Tokenizer` converts it to an `OpenVINO IR` and stores inside a `Subgraph` node. +`OpenVINO IR` - is one of the two `IR` types used by `Snippets`, it is simply a small `OpenVINO model` that can contain `Snippets`-specific operations. -`nGraph IR` is then passed to the `Optimizer` unit that in turn consists of three subunits. -The purpose of the first subunit is to perform data flow optimizations. The second subunit converts `nGraph IR` (data-flow-oriented representation) to `Linear IR` (control-flow-focused IR). Finally, the third subunit is dedicated to control flow optimizations. +`OpenVINO IR` is then passed to the `Optimizer` unit that in turn consists of three subunits. +The purpose of the first subunit is to perform data flow optimizations. The second subunit converts `OpenVINO IR` (data-flow-oriented representation) to `Linear IR` (control-flow-focused IR). Finally, the third subunit is dedicated to control flow optimizations. After all optimizations, the `Linear IR` is used by the `Generator` (which is `Snippets` `Backend`) to produce executable code, which we will refer to as `Kernel`. As discussed in the Introduction, the purpose of the `Kernel` is to process a part of the initial tensor, and several `Kernels` are usually executed in parallel to process the whole tensor. @@ -280,7 +280,7 @@ The `Snippets` integration into the plugin pipeline is schematically depicted be graph LR subgraph Plugin[ Plugin pipeline ] direction LR - subgraph ngraph[ Transformations on nGraph model ] + subgraph openvino[ Transformations on OpenVINO model ] direction LR common[Common \n Transformations] lpt[Low \n Precision] @@ -305,7 +305,7 @@ The `Snippets` integration into the plugin pipeline is schematically depicted be create-->execute end end - Source[nGraph \n model]-->|Main \n flow|common + Source[OpenVINO \n model]-->|Main \n flow|common convert~~~internal classDef no-bg-color fill:none,stroke-width:0px classDef steel1 fill:#B9D6E5, stroke: #86B3CA, color: #262626 @@ -315,15 +315,15 @@ class tokenize,optimize,generate steel1 class Source,Executable daisy1 class create,execute dafault_node1 ``` -As one can see from the picture, overall plugin pipeline consists of two major blocks: the first block applies transformations to `nGraph model` while the second one works with the internal plugin graph representation. Since `Snippets` is a backend-independent framework, it can't work with the plugin graph or plugin-specific `Ops` directly, so the tokenization is performed immediately before plugin-specific operations are introduced into the graph (`Conversion to Plugin opset`). -`Tokenizer` replaces parts of the `nGraph model` that can be executed by `Snippets` with `ov::op::Subgraph` nGraph nodes. -Each of the nodes stores a piece of the initial `nGraph model` that was replaced by the node. -This piece is stored as an nGraph model itself, which we refer to as `nGraph IR` to distinguish from the original `nGraph model`. +As one can see from the picture, overall plugin pipeline consists of two major blocks: the first block applies transformations to `OpenVINO model` while the second one works with the internal plugin graph representation. Since `Snippets` is a backend-independent framework, it can't work with the plugin graph or plugin-specific `Ops` directly, so the tokenization is performed immediately before plugin-specific operations are introduced into the graph (`Conversion to Plugin opset`). +`Tokenizer` replaces parts of the `OpenVINO model` that can be executed by `Snippets` with `ov::op::Subgraph` OpenVINO nodes. +Each of the nodes stores a piece of the initial `OpenVINO model` that was replaced by the node. +This piece is stored as an OpenVINO model itself, which we refer to as `OpenVINO IR` to distinguish from the original `OpenVINO model`. Note that sometimes the exact type of `IR` is not important in our discussion. -In such cases, we will refer to the `IR` (`nGraph` or `Linear`) as `body function`, or simply `body`. +In such cases, we will refer to the `IR` (`OpenVINO` or `Linear`) as `body function`, or simply `body`. -When the plugin finalizes all `nGraph model` transformations, the model is converted to an internal plugin graph representation. -At this point `ov::op::Subgraph` is converted to `ov::intel_cpu::node::Snippet` which still retains the `nGraph IR`. +When the plugin finalizes all `OpenVINO model` transformations, the model is converted to an internal plugin graph representation. +At this point `ov::op::Subgraph` is converted to `ov::intel_cpu::node::Snippet` which still retains the `OpenVINO IR`. This IR is then optimized and an executable `Kernel` is produced during the `CreateComputePrimitive` stage (`CreatePrimitive()` stage in CPU plugin). Finally, multiple copies of the produced kernel executed in parallel during the `Execute` stage. @@ -332,7 +332,7 @@ To summarize, `Snippets` workflow consists of three major blocks: `Tokenizer`, ` ### Tokenizer -`Tokenizer` is run on an `nGraph model` and its main purpose is to identify subgraphs that are suitable for code generation. +`Tokenizer` is run on an `OpenVINO model` and its main purpose is to identify subgraphs that are suitable for code generation. These subgraphs are then replaced with the `ov::op::Subgraph` node. This stage is called tokenization because the `Tokenizer` employs a greedy algorithm similar to the ones used for parsing input stream of characters into tokens. One of the distinctive features of this algorithm is its flexibility, so it can seamlessly handle arbitrary operations' patterns. @@ -371,8 +371,8 @@ The tokenization algorithm is depicted on the flowchart below. ``` Let us briefly describe the process: 1. If a Node is not supported by `Snippets`, then ignore it and proceed to the next one. -2. If a Node has no `Subgraph` parents, then replace it with `Subgraph` node and copy the initial Node to the `Subgraph's` body (which is in the `nGraph IR` form). -3. If a Node has a single `Subgraph` parent, then attach it to the `Subgraph`. It means copy the Node to the `Subgraph's` body, and remove it from the original `nGraph model`. Note that if the Node has more than one parent, corresponding parents' outputs will be connected with the updated `Subgraph` as shown on the diagram below. +2. If a Node has no `Subgraph` parents, then replace it with `Subgraph` node and copy the initial Node to the `Subgraph's` body (which is in the `OpenVINO IR` form). +3. If a Node has a single `Subgraph` parent, then attach it to the `Subgraph`. It means copy the Node to the `Subgraph's` body, and remove it from the original `OpenVINO model`. Note that if the Node has more than one parent, corresponding parents' outputs will be connected with the updated `Subgraph` as shown on the diagram below. 4. If a Node has multiple `Subgraph` parents, then they will be merged into a single `Subgraph` and the Node will be attached to it. ```mermaid graph LR @@ -409,7 +409,7 @@ If a `Constant` is not scalar, then it can't be tokenized since storing `Constan Please refer to the [collapse_subgraph.cpp](../src/pass/collapse_subgraph.cpp) to gain more insights on the tokenization process. There is however one more aspect of the tokenization process that is worth covering here. -As discussed in the **Plugin integration** section above, the `Tokenizer` is executed before the plugin converts the `nGraph model` to an internal graph representation. +As discussed in the **Plugin integration** section above, the `Tokenizer` is executed before the plugin converts the `OpenVINO model` to an internal graph representation. It means that the tokenized nodes will not be visible to the plugin (since they are hidden inside `Subrgaphs'` body functions), so they will be ignored by plugin optimization passes. In particular, the plugin won't be able to fuse the nodes using the OneDNN post-ops mechanism. This type of fusings is backend-specific, therefore can't be supported by `Snippets` directly, but it's still important from the performance perspective. @@ -424,15 +424,15 @@ Please, refer to the [snippets_mark_skipped.cpp](../../../plugins/intel_cpu/src/ As briefly discussed in the ***Architecture*** section, `Optimizer` consists of two major units: the first one performs data flow optimization, and the second one is focused on control flow. Note however that some data-flow-related passes can be performed only after the control flow optimizations, so the second unit modifies the dataflow as well. Nevertheless, we will refer to the units as `Data flow optimizer` and `Control flow optimizer` to reflect their main purpose. -Keep in mind that, as discussed above, the `Data flow optimizer` operates exclusively on the `nGraph IR`, while the `Control flow optimizer` works with the `Linear IR`. +Keep in mind that, as discussed above, the `Data flow optimizer` operates exclusively on the `OpenVINO IR`, while the `Control flow optimizer` works with the `Linear IR`. We will discuss these units in more detail below. #### Data flow optimizer Before `Data flow optimizer` can modify data flow, it needs to perform a preliminary stage called `Canonicalization`. To understand the stage's purpose we need to make a step back to the tokenization. - The `Tokenizer` saves a part of the initial `nGraph function` in `Subgraph's` body. - The problem is that the `nGraph function` has no information about data layouts that will be used by the `Subgraph's` parents during the `Execution` stage. + The `Tokenizer` saves a part of the initial `OpenVINO function` in `Subgraph's` body. + The problem is that the `OpenVINO function` has no information about data layouts that will be used by the `Subgraph's` parents during the `Execution` stage. This happens because the plugin assigns layouts on internal graph representation well after the tokenization is finished. The purpose of `Canonicalization` is to incorporate the plugin-defined input layouts into the body function. If an input's layout was changed to a blocked one, then the corresponding body input `Parameter` will be reshaped, and new shapes will be propagated through the body function. @@ -485,17 +485,17 @@ The managers will be executed on different stages of the pipeline to enable more #### Control flow optimizer As follows from its name, the main objective of `Control flow optimizer` is to manage and optimize control flow of the kernel. -Since the `nGraph IR` doesn't have an explicit control flow representation, a special control-flow-oriented `IR` was developed. +Since the `OpenVINO IR` doesn't have an explicit control flow representation, a special control-flow-oriented `IR` was developed. It is called `Linear IR` (or simply `LIR`), let's discuss it first, before we consider the transformation pipeline. ##### Linear Intermediate Representation `Linear IR` is specially designed to facilitate manipulations with control flow. -It is called linear, because it is essentially a sequence of `Expressions` (an analog of nGraph `Op`) that represents control flow. +It is called linear, because it is essentially a sequence of `Expressions` (an analog of OpenVINO `Op`) that represents control flow. So if `Expression 1` is followed by `Expression 2` in `LIR` then the code for `Expression 1` will be emitted before the code for `Expression 2`. Note that this doesn't necessarily mean that the `Expression 2` uses the result of `Expression 1`, they can be completely unrelated from the data flow standpoint. The only restriction here is that all the `Expression's` inputs must be ready by the time it is executed. -This restriction is the same as in `nGraph IR`, but an important distinction here is that `LIR` allows to permute `Expressions` while this data-dependency condition is fulfilled. +This restriction is the same as in `OpenVINO IR`, but an important distinction here is that `LIR` allows to permute `Expressions` while this data-dependency condition is fulfilled. So the `LIR` preserves data dependencies, but also allows for a more control on expressions' order that represents control flow. This is a brief rationale behind the linear `IR`, now let's move to the implementation. @@ -536,13 +536,13 @@ flowchart LR class consumers no-bg ``` -`LinearIR` is our graph representation, it's an analog to an nGraph model. +`LinearIR` is our graph representation, it's an analog to an OpenVINO model. It is simply a container for `Expressions`, the order of `Expressions` represents control flow. -`LIR` also incorporates a range of useful methods to manage the `Expressions`, for example `create_expression(...)` to build `Expressions` from nGraph nodes, or `replace_input(...)` to modify data dependencies between `Expressions`. +`LIR` also incorporates a range of useful methods to manage the `Expressions`, for example `create_expression(...)` to build `Expressions` from OpenVINO nodes, or `replace_input(...)` to modify data dependencies between `Expressions`. Please refer to the implementation in [linear_ir.cpp](../src/lowered/linear_ir.cpp) for more details. `Expression` is the main building block of a `Linear IR`. -It contains a pointer to the nGraph node it was created from and a pointer to the emitter it will be mapped to (which is null until `Expression::init_emitter(...)` is called). +It contains a pointer to the OpenVINO node it was created from and a pointer to the emitter it will be mapped to (which is null until `Expression::init_emitter(...)` is called). An `Expression` can have an arbitrary number of inputs and outputs, we will refer to them simply as ports. Every port can be uniquely identified by the `ExpressionPort` class. The `ExpressionPort` contains a pointer to the `Expression` which port it represents, the port type (`input` or `output`) and its index (input/output number). @@ -556,7 +556,7 @@ This information will be used by the control flow optimization pipeline to deter An `Expression` internally stores two separate vectors of input and output `PortDescriptors` which could be accessed by calling `get_input_port_descriptors()` or `get_input_port_descriptor(i)` (and similar for outputs). Finally, `PortConnectors` specify how the `Expression's` ports are connected. -Note that an `Expression` output can be connected to several inputs (like with nGraph nodes), So every `PortConnector` stores one source `ExpressionPort` and a set of consumer `ExpressionPorts` that can be accessed by the `get_source()` or `get_consumers()` methods, respectively. +Note that an `Expression` output can be connected to several inputs (like with OpenVINO nodes), So every `PortConnector` stores one source `ExpressionPort` and a set of consumer `ExpressionPorts` that can be accessed by the `get_source()` or `get_consumers()` methods, respectively. Like with `PortDescriptors`, an `Expression` stores input and output `PortConnectors` in two separate vectors accessed via `get_input_port_connector(i)` (or its output twin). An example on how `PortConnectors` can be used to move between `Expressions` is given on the right side of the above picture. @@ -622,7 +622,7 @@ Please see [assign_registers.cpp](../src/lowered/pass/assign_registers.cpp) and When the `Preparation` is finished, the `Generator` constructs target-specific emitters by calling `init_emitter(target)` method for every `Expression` in the `LinearIR`, where the `target` is a `TargetMachine` instance. The `TargetMachine` is a class that provides generator with target-specific information, such as supported instruction sets, vector register size etc. -`TargetMachine` also maps the nGraph's `DiscreteTypeInfo` (stored in the `Expression`) to the emitter that actually implements the operation. +`TargetMachine` also maps the OpenVINO's `DiscreteTypeInfo` (stored in the `Expression`) to the emitter that actually implements the operation. The mapping is done using the `jitters` map defined in [target_machine.hpp](../include/snippets/target_machine.hpp). In order for this mechanism to work, every `Snippets'` code generation backend should create emitter implementations derived from the `Emitter` base class defined in [emitter.hpp](../include/snippets/emitter.hpp). The backend then should create its own target machine class (derived from the common `TargetMachine`) and populate the `jitters` map, see the [cpu_generator.cpp](../../../plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp) for an implementation example. diff --git a/src/common/snippets/include/snippets/itt.hpp b/src/common/snippets/include/snippets/itt.hpp index 0c594165ab5776..4a617f5a06e645 100644 --- a/src/common/snippets/include/snippets/itt.hpp +++ b/src/common/snippets/include/snippets/itt.hpp @@ -9,7 +9,7 @@ #pragma once -#include +#include namespace ov { namespace pass { @@ -26,7 +26,7 @@ OV_CC_DOMAINS(internal_op); /* * RUN_ON_FUNCTION_SCOPE macro allows to disable the run_on_function pass * MATCHER_SCOPE macro allows to disable the MatcherPass if matcher isn't applied - * INTERNAL_OP_SCOPE macro allows to disable parts of internal nGraph operations if they are not used + * INTERNAL_OP_SCOPE macro allows to disable parts of internal openvino operations if they are not used */ #if defined(SELECTIVE_BUILD_ANALYZER) diff --git a/src/common/snippets/include/snippets/op/loop.hpp b/src/common/snippets/include/snippets/op/loop.hpp index fefc1368bb4307..1fd51649fc65d1 100644 --- a/src/common/snippets/include/snippets/op/loop.hpp +++ b/src/common/snippets/include/snippets/op/loop.hpp @@ -6,7 +6,7 @@ #include "openvino/op/op.hpp" #include "snippets/emitter.hpp" -#include "ngraph/op/parameter.hpp" +#include "openvino/op/parameter.hpp" namespace ov { namespace snippets { diff --git a/src/common/snippets/include/snippets/op/powerstatic.hpp b/src/common/snippets/include/snippets/op/powerstatic.hpp index 5a1d0abb23ffb4..d76fa48e0601aa 100644 --- a/src/common/snippets/include/snippets/op/powerstatic.hpp +++ b/src/common/snippets/include/snippets/op/powerstatic.hpp @@ -5,7 +5,6 @@ #pragma once #include "openvino/op/op.hpp" -#include #include namespace ov { diff --git a/src/common/snippets/include/snippets/op/scalar.hpp b/src/common/snippets/include/snippets/op/scalar.hpp index 43ecb1aad671cc..2720ffdc062091 100644 --- a/src/common/snippets/include/snippets/op/scalar.hpp +++ b/src/common/snippets/include/snippets/op/scalar.hpp @@ -5,7 +5,7 @@ #pragma once #include "openvino/op/op.hpp" -#include "ngraph/op/constant.hpp" +#include "openvino/op/constant.hpp" namespace ov { namespace snippets { diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index dab2de53e56d47..a9321e957e273c 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -190,10 +190,10 @@ class Subgraph : public ov::op::util::SubGraphOp { std::shared_ptr m_shape_infer = nullptr; - class NgraphShapeInfer : public ShapeInferSnippetsNode { - std::shared_ptr m_ngraph_body; + class OVShapeInfer : public ShapeInferSnippetsNode { + std::shared_ptr m_ov_body; public: - explicit NgraphShapeInfer(const std::shared_ptr& body); + explicit OVShapeInfer(const std::shared_ptr& body); Result infer(const std::vector& input_shapes) override; }; }; diff --git a/src/common/snippets/include/snippets/pass/propagate_precision.hpp b/src/common/snippets/include/snippets/pass/propagate_precision.hpp index 1f5bd0cf9542bf..6f805cb1b68808 100644 --- a/src/common/snippets/include/snippets/pass/propagate_precision.hpp +++ b/src/common/snippets/include/snippets/pass/propagate_precision.hpp @@ -5,7 +5,7 @@ #pragma once #include -#include +#include "openvino/pass/pass.hpp" #include "snippets/generator.hpp" namespace ov { diff --git a/src/common/snippets/include/snippets/shape_inference/shape_inference.hpp b/src/common/snippets/include/snippets/shape_inference/shape_inference.hpp index af7d29f8e3f3c3..9066d571cbb4e6 100644 --- a/src/common/snippets/include/snippets/shape_inference/shape_inference.hpp +++ b/src/common/snippets/include/snippets/shape_inference/shape_inference.hpp @@ -38,7 +38,7 @@ class IShapeInferSnippets { }; /** - * Shape inference class for Subgraph node (both nGraph and Linear IRs). + * Shape inference class for Subgraph node (both openvino and Linear IRs). * It stores the result of the last shape inference, so it can be reused in optimization pipeline. * */ diff --git a/src/common/snippets/src/lowered/expression_factory.cpp b/src/common/snippets/src/lowered/expression_factory.cpp index 34651fd6dbbbd2..cd5cfe0db74c53 100644 --- a/src/common/snippets/src/lowered/expression_factory.cpp +++ b/src/common/snippets/src/lowered/expression_factory.cpp @@ -69,7 +69,7 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr(new IOExpression(res, model->get_result_index(res), linear_ir.m_shape_infer_factory)); create_expression_inputs(linear_ir, expr); - // The Result node don't need output port (because of sense of the node). But each node in ngraph must have one output at least. + // The Result node don't need output port (because of sense of the node). But each node in openvino must have one output at least. // The port descriptors are automatically created in constructor. We manually clean output ports. expr->m_output_port_descriptors.clear(); expr->validate(); @@ -110,7 +110,7 @@ ExpressionPtr LinearIR::ExpressionFactory::create(const std::shared_ptr(last_input.get_expr()->get_node()), "LoopEnd expression expects LoopBegin on last input"); expr->m_input_port_descriptors[inputs.size() - 1] = last_input.get_descriptor_ptr()->clone(); init_expression_inputs(expr, inputs); - // The LoopEnd node don't need output port (because of sense of the node). But each node in ngraph must have one output at least. + // The LoopEnd node don't need output port (because of sense of the node). But each node in openvino must have one output at least. // The port descriptors are automatically created in constructor. We manually clean output ports. expr->m_output_port_descriptors.clear(); expr->validate(); diff --git a/src/common/snippets/src/lowered/pass/identify_buffers.cpp b/src/common/snippets/src/lowered/pass/identify_buffers.cpp index 02aabc93ead6ac..d411da67af38d6 100644 --- a/src/common/snippets/src/lowered/pass/identify_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/identify_buffers.cpp @@ -36,7 +36,7 @@ std::vector IdentifyBuffers::create_adjacency_matrix(const LinearIR& linea auto get_buffer_idx = [&](const std::shared_ptr& buffer) { const auto iter = std::find(buffers.cbegin(), buffers.cend(), buffer); - NGRAPH_CHECK(iter != buffers.cend(), "Buffer wasn't find in Buffer system of Subgraph"); + OPENVINO_ASSERT(iter != buffers.cend(), "Buffer wasn't find in Buffer system of Subgraph"); return std::distance(buffers.cbegin(), iter); }; diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp index b64a4328a83b1c..5cce5d85c13a82 100644 --- a/src/common/snippets/src/op/brgemm.cpp +++ b/src/common/snippets/src/op/brgemm.cpp @@ -127,7 +127,7 @@ ov::PartialShape Brgemm::get_planar_output_shape(const ov::PartialShape& output_ } ov::PartialShape Brgemm::get_output_partial_shape(const std::vector& input_shapes) const { - NGRAPH_CHECK(input_shapes.size() == 2, "BRGEMM expects 2 input shapes for shape inference"); + OPENVINO_ASSERT(input_shapes.size() == 2, "BRGEMM expects 2 input shapes for shape inference"); // Note: All majors checks are missed because Brgemm is transformed from MatMul with whole shape infer support diff --git a/src/common/snippets/src/op/fill.cpp b/src/common/snippets/src/op/fill.cpp index 437f594cdfc519..05f79495ae1748 100644 --- a/src/common/snippets/src/op/fill.cpp +++ b/src/common/snippets/src/op/fill.cpp @@ -32,7 +32,7 @@ std::shared_ptr Fill::clone_with_new_inputs(const OutputVector& new_args) void Fill::validate_and_infer_types() { INTERNAL_OP_SCOPE(Fill_validate_and_infer_types); const auto in_type = get_input_element_type(0); - NGRAPH_CHECK(in_type.size() == 4, "Fill operation supports only element types with 4 byte size but got:" + std::to_string(in_type.size())); + OPENVINO_ASSERT(in_type.size() == 4, "Fill operation supports only element types with 4 byte size but got:" + std::to_string(in_type.size())); set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); } diff --git a/src/common/snippets/src/op/load.cpp b/src/common/snippets/src/op/load.cpp index d1a7d0f2cb523e..868ed4294e6dab 100644 --- a/src/common/snippets/src/op/load.cpp +++ b/src/common/snippets/src/op/load.cpp @@ -40,13 +40,13 @@ std::shared_ptr Load::clone_with_new_inputs(const OutputVector& new_args) LoadReshape::LoadReshape(const Output& x, const size_t count, const size_t offset, std::vector order) : Load(x, count, offset), m_order(std::move(order)) { const auto& in_shape = x.get_partial_shape(); - NGRAPH_CHECK(in_shape.is_static(), "LoadReshape supports only static input shapes"); + OPENVINO_ASSERT(in_shape.is_static(), "LoadReshape supports only static input shapes"); const auto in_shape_size = in_shape.size(); - NGRAPH_CHECK(m_order.size() == in_shape_size, "LoadReshape got new_order of invalid size"); - NGRAPH_CHECK(*std::max_element(m_order.begin(), m_order.end()) == in_shape_size - 1 && + OPENVINO_ASSERT(m_order.size() == in_shape_size, "LoadReshape got new_order of invalid size"); + OPENVINO_ASSERT(*std::max_element(m_order.begin(), m_order.end()) == in_shape_size - 1 && *std::min_element(m_order.begin(), m_order.end()) == 0, "LoadReshape detected invalid values in new_order"); const std::set unique_dims(order.begin(), order.end()); - NGRAPH_CHECK(unique_dims.size() == order.size(), "LoadReshape order must not contain repeated elements"); + OPENVINO_ASSERT(unique_dims.size() == order.size(), "LoadReshape order must not contain repeated elements"); constructor_validate_and_infer_types(); } diff --git a/src/common/snippets/src/op/memory_access.cpp b/src/common/snippets/src/op/memory_access.cpp index 117c1bd14e2e7f..f98d72be7f94f5 100644 --- a/src/common/snippets/src/op/memory_access.cpp +++ b/src/common/snippets/src/op/memory_access.cpp @@ -73,25 +73,25 @@ bool MemoryAccess::is_memory_access_output_port(size_t idx) const { void MemoryAccess::set_input_port_descriptor(const PortDescriptor& desc, const size_t i) { const auto it = m_input_ports.find(i); - NGRAPH_CHECK(it != m_input_ports.end(), "Index of input port descriptor should be less than count of input ports"); + OPENVINO_ASSERT(it != m_input_ports.end(), "Index of input port descriptor should be less than count of input ports"); (*it).second = { desc.count, desc.offset, i}; } void MemoryAccess::set_output_port_descriptor(const PortDescriptor& desc, const size_t i) { const auto it = m_output_ports.find(i); - NGRAPH_CHECK(it != m_output_ports.end(), "Index of output port descriptor should be less than count of output ports"); + OPENVINO_ASSERT(it != m_output_ports.end(), "Index of output port descriptor should be less than count of output ports"); (*it).second = { desc.count, desc.offset, i}; } const MemoryAccess::PortDescriptor& MemoryAccess::get_input_port_descriptor(const size_t i) const { const auto it = m_input_ports.find(i); - NGRAPH_CHECK(it != m_input_ports.end(), "Index of input port descriptor should be less than count of input ports"); + OPENVINO_ASSERT(it != m_input_ports.end(), "Index of input port descriptor should be less than count of input ports"); return (*it).second; } const MemoryAccess::PortDescriptor& MemoryAccess::get_output_port_descriptor(const size_t i) const { const auto it = m_output_ports.find(i); - NGRAPH_CHECK(it != m_output_ports.end(), "Index of output port descriptor should be less than count of output ports"); + OPENVINO_ASSERT(it != m_output_ports.end(), "Index of output port descriptor should be less than count of output ports"); return (*it).second; } diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 5de4dae47a95a4..dc13bb3e8bb716 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -160,7 +160,7 @@ Subgraph::Subgraph(const OutputVector& args, const std::shared_ptr& b for (size_t i = 0; i < body->get_output_size(); ++i) m_output_descriptions[0].push_back(std::make_shared(i, i)); m_transformations_allowed = false; - m_shape_infer = std::make_shared(body); + m_shape_infer = std::make_shared(body); } Subgraph::Subgraph(const NodeVector& args, const std::shared_ptr& body) @@ -292,7 +292,7 @@ auto Subgraph::wrap_node_as_subgraph(const std::shared_ptr& node) -> s } void Subgraph::fill_empty_output_names(const Output& target_output_node, const Output& replacement_output_node) { - NGRAPH_SUPPRESS_DEPRECATED_START + OPENVINO_SUPPRESS_DEPRECATED_START auto& out_tensor = target_output_node.get_tensor(); const std::string new_name = ov::op::util::get_ie_output_name(replacement_output_node); if (ov::descriptor::get_ov_tensor_legacy_name(out_tensor).empty()) { @@ -301,7 +301,7 @@ void Subgraph::fill_empty_output_names(const Output& target_output_node, c if (!replacement_output_node.get_names().empty()) { out_tensor.set_names(replacement_output_node.get_names()); } - NGRAPH_SUPPRESS_DEPRECATED_END + OPENVINO_SUPPRESS_DEPRECATED_END } auto Subgraph::constant_input_should_be_inside_body(const std::shared_ptr& node) -> bool { @@ -484,18 +484,18 @@ IShapeInferSnippets::Result Subgraph::shape_infer(const std::vectorinfer(input_shapes); } -Subgraph::NgraphShapeInfer::NgraphShapeInfer(const std::shared_ptr& body) : - m_ngraph_body(body) { - OPENVINO_ASSERT(m_ngraph_body, "Can't initialize shape infer with empty body"); +Subgraph::OVShapeInfer::OVShapeInfer(const std::shared_ptr& body) : + m_ov_body(body) { + OPENVINO_ASSERT(m_ov_body, "Can't initialize shape infer with empty body"); } -IShapeInferSnippets::Result Subgraph::NgraphShapeInfer::infer(const std::vector& input_shapes) { - const ParameterVector& parameters = m_ngraph_body->get_parameters(); - const ResultVector& results = m_ngraph_body->get_results(); +IShapeInferSnippets::Result Subgraph::OVShapeInfer::infer(const std::vector& input_shapes) { + const ParameterVector& parameters = m_ov_body->get_parameters(); + const ResultVector& results = m_ov_body->get_results(); OPENVINO_ASSERT(parameters.size() == input_shapes.size(), "Got invalid number of input shapes to reshape subgraph body"); for (size_t i = 0; i < parameters.size(); ++i) parameters[i]->set_partial_shape(utils::vdims_to_pshape(input_shapes[i].get())); - m_ngraph_body->validate_nodes_and_infer_types(); + m_ov_body->validate_nodes_and_infer_types(); std::vector outputDims; for (const auto& res : results) outputDims.emplace_back(utils::pshape_to_vdims(res->get_input_partial_shape(0))); @@ -702,7 +702,7 @@ snippets::Schedule Subgraph::generate(const std::vector diff --git a/src/common/snippets/src/pass/hash.cpp b/src/common/snippets/src/pass/hash.cpp index 48dd9586ae4337..2f975ef2cbccee 100644 --- a/src/common/snippets/src/pass/hash.cpp +++ b/src/common/snippets/src/pass/hash.cpp @@ -10,8 +10,6 @@ #include #include -#include "ngraph/ops.hpp" -#include "ngraph/opsets/opset.hpp" #include "openvino/core/except.hpp" #include "openvino/core/meta_data.hpp" #include "openvino/core/model.hpp" @@ -169,7 +167,7 @@ class SnippetsHasher : public ov::AttributeVisitor { m_node_type_name(node_type_name) {} void on_adapter(const std::string& name, ov::ValueAccessor& adapter) override { - if (const auto& a = ov::as_type>>(&adapter)) { + if (const auto& a = ov::as_type>>(&adapter)) { m_hash = hash_combine(hash_combine(m_hash, name), a->get()->get_info().variable_id); } else if (const auto& a = ov::as_type>>(&adapter)) { diff --git a/src/common/snippets/src/pass/propagate_precision.cpp b/src/common/snippets/src/pass/propagate_precision.cpp index 6ba1f5f3d09ad1..568db74d6a5c0a 100644 --- a/src/common/snippets/src/pass/propagate_precision.cpp +++ b/src/common/snippets/src/pass/propagate_precision.cpp @@ -32,7 +32,7 @@ bool ov::snippets::pass::PropagatePrecision::run_on_model(const std::shared_ptr< auto type_info = op->get_type_info(); std::set supported_precisions; // TODO: At the moment Softmax is decomposed on Linear IR level. - // When Softmax will be decomposed on NGraph level, remove it + // When Softmax will be decomposed on openvino level, remove it if (type_info.is_castable(ov::op::v1::Softmax::get_type_info_static())) { supported_precisions = {{ov::element::f32}}; } else { diff --git a/src/common/snippets/src/pass/softmax_reshape_elimination.cpp b/src/common/snippets/src/pass/softmax_reshape_elimination.cpp index 2f60f1e1155c76..36a0afb7c11325 100644 --- a/src/common/snippets/src/pass/softmax_reshape_elimination.cpp +++ b/src/common/snippets/src/pass/softmax_reshape_elimination.cpp @@ -10,7 +10,7 @@ #include "openvino/core/rt_info.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" -#include +#include "openvino/core/validation_util.hpp" ov::snippets::pass::SoftmaxReshapeElimination::SoftmaxReshapeElimination() { MATCHER_SCOPE(SoftmaxReshapeElimination); diff --git a/src/common/transformations/CMakeLists.txt b/src/common/transformations/CMakeLists.txt index 164daec54c2f18..e7d365ca32492e 100644 --- a/src/common/transformations/CMakeLists.txt +++ b/src/common/transformations/CMakeLists.txt @@ -4,11 +4,11 @@ set(TARGET_NAME "inference_engine_transformations") -file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) -file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) - set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") +file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/*.hpp) + # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj @@ -27,8 +27,8 @@ ov_build_target_faster(${TARGET_NAME}_obj target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::reference openvino::itt openvino::builders openvino::core::dev openvino::shape_inference) -target_include_directories(${TARGET_NAME}_obj PRIVATE $ - "${CMAKE_CURRENT_SOURCE_DIR}/src") +target_include_directories(${TARGET_NAME}_obj PRIVATE "${PUBLIC_HEADERS_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/src") ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}_obj) @@ -43,14 +43,14 @@ endif() add_library(${TARGET_NAME} INTERFACE) target_include_directories(${TARGET_NAME} INTERFACE - $ + $ $>) target_link_libraries(${TARGET_NAME} INTERFACE openvino::runtime) # even the Transformations library is supposed to be Plugin API # we still have some code compiled as transformations, but headers are -# part of ngraph core API +# part of openvino core API # so, we need to mark this library as important for ABI free ov_abi_free_target(${TARGET_NAME}_obj) diff --git a/src/common/transformations/tests/CMakeLists.txt b/src/common/transformations/tests/CMakeLists.txt index 84a4fb6e900edf..7091f5162fea8d 100644 --- a/src/common/transformations/tests/CMakeLists.txt +++ b/src/common/transformations/tests/CMakeLists.txt @@ -15,7 +15,7 @@ ov_add_test_target( LINK_LIBRARIES gmock func_test_utils - offline_transformations + openvino::offline_transformations sharedTestClasses ov_lpt_models ADD_CLANG_FORMAT diff --git a/src/common/transformations/tests/common_optimizations/fq_mul_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/fq_mul_fusion_test.cpp index 61f43c937be16e..7dcf6a9c44b3c3 100644 --- a/src/common/transformations/tests/common_optimizations/fq_mul_fusion_test.cpp +++ b/src/common/transformations/tests/common_optimizations/fq_mul_fusion_test.cpp @@ -13,7 +13,6 @@ #include "common_test_utils/ov_test_utils.hpp" #include "common_test_utils/test_common.hpp" #include "functional_test_utils/plugin_cache.hpp" -#include "ie_core.hpp" #include "openvino/core/model.hpp" #include "openvino/opsets/opset4.hpp" #include "openvino/pass/manager.hpp" diff --git a/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp b/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp index 8e92a5e3f7797f..8127ad129ef34b 100644 --- a/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/fq_reshape_fusion.cpp @@ -10,7 +10,6 @@ #include #include -#include "cnn_network_ngraph_impl.hpp" #include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" #include "openvino/opsets/opset4.hpp" @@ -19,7 +18,6 @@ using namespace ov; using namespace testing; -using namespace InferenceEngine; namespace { @@ -32,8 +30,8 @@ struct FQReshapeFusionTestCase { bool is_negative; }; -class nGraphFQReshapeFusionTests : public ov::test::TestsCommon, - public testing::WithParamInterface> { +class FQReshapeFusionTests : public ov::test::TestsCommon, + public testing::WithParamInterface> { public: std::shared_ptr f, ref_f; @@ -115,7 +113,7 @@ class nGraphFQReshapeFusionTests : public ov::test::TestsCommon, } }; -TEST_P(nGraphFQReshapeFusionTests, ReshapeMatMul) { +TEST_P(FQReshapeFusionTests, ReshapeMatMul) { auto unh = std::make_shared(); pass::Manager manager; manager.register_pass(unh); @@ -134,7 +132,7 @@ TEST_P(nGraphFQReshapeFusionTests, ReshapeMatMul) { INSTANTIATE_TEST_SUITE_P( NGraph, - nGraphFQReshapeFusionTests, + FQReshapeFusionTests, testing::Values( // positive FQReshapeFusionTestCase{{1, 2, 1, 3}, diff --git a/src/common/transformations/tests/common_optimizations/mish_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/mish_fusion_test.cpp index 61d236a6355628..4fa1af8088d6b2 100644 --- a/src/common/transformations/tests/common_optimizations/mish_fusion_test.cpp +++ b/src/common/transformations/tests/common_optimizations/mish_fusion_test.cpp @@ -19,7 +19,7 @@ using namespace ov; using namespace testing; -// LPT to nGraph migration: temporary disabling unexpected not reproduced fails on CI: +// LPT to openvino migration: temporary disabling unexpected not reproduced fails on CI: // https://openvino-ci.intel.com/job/private-ci/job/ie/job/build-linux-ubuntu18_i386/478/ TEST_F(TransformationTestsF, MishFusing) { { diff --git a/src/common/transformations/tests/smart_reshape/sr_mimicking_sbs.cpp b/src/common/transformations/tests/smart_reshape/sr_mimicking_sbs.cpp index 5b11259cbaf998..40f954312b7f76 100644 --- a/src/common/transformations/tests/smart_reshape/sr_mimicking_sbs.cpp +++ b/src/common/transformations/tests/smart_reshape/sr_mimicking_sbs.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include "common_test_utils/ov_test_utils.hpp" @@ -20,15 +19,9 @@ TEST(SmartReshapeTests, MimickingSBS) { f = std::make_shared(NodeVector{reshape}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({12, 4})); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 2, 3, 4})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, MimickingSBS_1) { @@ -40,15 +33,9 @@ TEST(SmartReshapeTests, MimickingSBS_1) { f = std::make_shared(NodeVector{reshape}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({2, 24})); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 2, 3, 4})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, MimickingSBS_2) { @@ -60,13 +47,7 @@ TEST(SmartReshapeTests, MimickingSBS_2) { f = std::make_shared(NodeVector{reshape}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(1)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({6, 4})); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 2, 3, 4})); + EXPECT_ANY_THROW(set_batch(f, 1)); } diff --git a/src/common/transformations/tests/smart_reshape/sr_proposal_scales.cpp b/src/common/transformations/tests/smart_reshape/sr_proposal_scales.cpp index 5e8088a9f2371d..06408dc2807d36 100644 --- a/src/common/transformations/tests/smart_reshape/sr_proposal_scales.cpp +++ b/src/common/transformations/tests/smart_reshape/sr_proposal_scales.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include "common_test_utils/ov_test_utils.hpp" @@ -39,12 +38,9 @@ TEST(SmartReshapeTests, Proposal1Scales) { f = std::make_shared(NodeVector{proposal}, ParameterVector{input_0, input_1, input_2}); } - InferenceEngine::CNNNetwork network(f); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({600, 5})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, Proposal1Scales_WithConvert) { @@ -75,12 +71,9 @@ TEST(SmartReshapeTests, Proposal1Scales_WithConvert) { f = std::make_shared(NodeVector{proposal}, ParameterVector{input_0, input_1, input_2}); } - InferenceEngine::CNNNetwork network(f); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({600, 5})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, Proposal4Scales) { @@ -110,14 +103,9 @@ TEST(SmartReshapeTests, Proposal4Scales) { f = std::make_shared(NodeVector{proposal}, ParameterVector{input_0, input_1, input_2}); } - InferenceEngine::CNNNetwork network(f); - auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({600, 5})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, Proposal4Scales_WithConvert) { @@ -148,12 +136,7 @@ TEST(SmartReshapeTests, Proposal4Scales_WithConvert) { f = std::make_shared(NodeVector{proposal}, ParameterVector{input_0, input_1, input_2}); } - InferenceEngine::CNNNetwork network(f); - auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({600, 5})); + EXPECT_ANY_THROW(set_batch(f, 2)); } diff --git a/src/common/transformations/tests/smart_reshape/sr_reshape_1d.cpp b/src/common/transformations/tests/smart_reshape/sr_reshape_1d.cpp index bad3962e3fd080..d98cb32f258f4d 100644 --- a/src/common/transformations/tests/smart_reshape/sr_reshape_1d.cpp +++ b/src/common/transformations/tests/smart_reshape/sr_reshape_1d.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include "common_test_utils/ov_test_utils.hpp" @@ -19,20 +18,16 @@ TEST(SmartReshapeTests, Reshape1d) { f = std::make_shared(NodeVector{reshape}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE( - network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({5})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({5})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {1, 3, 300, 300}}})); + ASSERT_NO_THROW(f->reshape({{1, 3, 300, 300}})); check_unique_names(f, unh); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({270000})); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3, 300, 300})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({270000})); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 3, 300, 300})); } TEST(SmartReshapeTests, Reshape1d_negative) { @@ -44,19 +39,10 @@ TEST(SmartReshapeTests, Reshape1d_negative) { f = std::make_shared(NodeVector{reshape}, ParameterVector{input, pattern}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE( - network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().is_dynamic()); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().is_dynamic()); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {1, 3, 300, 300}}})); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({270000})); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3, 300, 300})); - ASSERT_FALSE(network.getFunction()->get_parameters()[1]->get_output_target_inputs(0).empty()); + EXPECT_ANY_THROW(f->reshape({{1, 3, 300, 300}})); } diff --git a/src/common/transformations/tests/smart_reshape/sr_strided_slice_squeeze.cpp b/src/common/transformations/tests/smart_reshape/sr_strided_slice_squeeze.cpp index 002dc860dfffba..3c9053594ff68c 100644 --- a/src/common/transformations/tests/smart_reshape/sr_strided_slice_squeeze.cpp +++ b/src/common/transformations/tests/smart_reshape/sr_strided_slice_squeeze.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include "common_test_utils/ov_test_utils.hpp" @@ -27,19 +26,13 @@ TEST(SmartReshapeTests, SS_Squeeze) { f = std::make_shared(NodeVector{relu}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({3})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 3})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 3})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, SS_Squeeze_partial_begin_end_mask) { @@ -59,21 +52,19 @@ TEST(SmartReshapeTests, SS_Squeeze_partial_begin_end_mask) { f = std::make_shared(NodeVector{relu}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({1, 768})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 128, 768})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({1, 768})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 128, 768})); auto unh = std::make_shared(); init_unique_names(f, unh); - auto inputname = network.getFunction()->get_parameters()[0]->get_friendly_name(); - ASSERT_NO_THROW(network.reshape(InferenceEngine::ICNNNetwork::InputShapes{{inputname, {2, 128, 768}}})); + auto inputname = f->get_parameters()[0]->get_friendly_name(); + ASSERT_NO_THROW(f->reshape({{2, 128, 768}})); check_unique_names(f, unh); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({2, 768})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 128, 768})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({2, 768})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({2, 128, 768})); } TEST(SmartReshapeTests, SS_Squeeze_partial_begin_end) { @@ -95,21 +86,19 @@ TEST(SmartReshapeTests, SS_Squeeze_partial_begin_end) { f = std::make_shared(NodeVector{relu}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({1, 768})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 1, 768})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({1, 768})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 1, 768})); auto unh = std::make_shared(); init_unique_names(f, unh); - auto inputname = network.getFunction()->get_parameters()[0]->get_friendly_name(); - ASSERT_NO_THROW(network.reshape(InferenceEngine::ICNNNetwork::InputShapes{{inputname, {2, 1, 768}}})); + auto inputname = f->get_parameters()[0]->get_friendly_name(); + ASSERT_NO_THROW(f->reshape({{2, 1, 768}})); check_unique_names(f, unh); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({2, 768})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 1, 768})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({2, 768})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({2, 1, 768})); } TEST(SmartReshapeTests, SS_Squeeze_mask_use_negative) { @@ -128,15 +117,13 @@ TEST(SmartReshapeTests, SS_Squeeze_mask_use_negative) { f = std::make_shared(NodeVector{squeeze}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({1, 3})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({1, 3})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 3})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_ANY_THROW(network.setBatchSize(2)); + ASSERT_ANY_THROW(set_batch(f, 2)); check_unique_names(f, unh); } @@ -156,15 +143,13 @@ TEST(SmartReshapeTests, SS_Squeeze_negative_stride_negative) { f = std::make_shared(NodeVector{relu}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({3})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 3})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_ANY_THROW(network.setBatchSize(2)); + ASSERT_ANY_THROW(set_batch(f, 2)); check_unique_names(f, unh); } @@ -185,20 +170,13 @@ TEST(SmartReshapeTests, SS_SharedSqueezes) { f = std::make_shared(NodeVector{relu_1, relu_2}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({3})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 3})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 3})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, SS_SqueezeNegativeAxes) { @@ -218,20 +196,13 @@ TEST(SmartReshapeTests, SS_SqueezeNegativeAxes) { f = std::make_shared(NodeVector{relu}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3, 8, 2})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3, 1, 8, 1, 2})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({3, 8, 2})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 3, 1, 8, 1, 2})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3, 8, 2})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 3, 1, 8, 1, 2})); + EXPECT_ANY_THROW(set_batch(f, 2)); } TEST(SmartReshapeTests, Squeeze_SSNegativeAxes) { @@ -250,18 +221,11 @@ TEST(SmartReshapeTests, Squeeze_SSNegativeAxes) { f = std::make_shared(NodeVector{ss}, ParameterVector{input}); } - InferenceEngine::CNNNetwork network(f); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3, 8, 2})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({1, 3, 1, 8, 1, 2})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({3, 8, 2})) + << f->get_results()[0]->get_output_partial_shape(0); + ASSERT_TRUE(f->get_parameters()[0]->get_partial_shape().compatible({1, 3, 1, 8, 1, 2})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.setBatchSize(2)); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({3, 8, 2})) - << network.getFunction()->get_results()[0]->get_output_partial_shape(0); - ASSERT_TRUE(network.getFunction()->get_parameters()[0]->get_partial_shape().compatible({2, 3, 1, 8, 1, 2})); + EXPECT_ANY_THROW(set_batch(f, 2)); } diff --git a/src/common/transformations/tests/smart_reshape/sr_sub_graph_ops.cpp b/src/common/transformations/tests/smart_reshape/sr_sub_graph_ops.cpp index 9ff6aa84ca7419..25c30db2fa4339 100644 --- a/src/common/transformations/tests/smart_reshape/sr_sub_graph_ops.cpp +++ b/src/common/transformations/tests/smart_reshape/sr_sub_graph_ops.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include "common_test_utils/ov_test_utils.hpp" @@ -49,26 +48,17 @@ TEST(SmartReshapeTests, TensorIteratorStaticParameters) { f = std::make_shared(OutputVector{out0, out1, out2, out3}, ParameterVector{X, Y, M}); } - InferenceEngine::CNNNetwork network(f); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({})); + ASSERT_TRUE(f->get_results()[1]->get_output_partial_shape(0).compatible({1, 1, 1})); // concat output (seq len = 1, so it means num_iter = 1) - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({1, 1, 1})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[2]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[3]->get_output_partial_shape(0).compatible({1, 1, 1})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, - {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, - {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({32, 1, 10})); - // concat output - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({32, 10, 10})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({32, 1, 1})); + EXPECT_ANY_THROW(f->reshape({{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, + {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, + {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); } TEST(SmartReshapeTests, TensorIteratorDynamicParameters) { @@ -109,26 +99,17 @@ TEST(SmartReshapeTests, TensorIteratorDynamicParameters) { f = std::make_shared(OutputVector{out0, out1, out2, out3}, ParameterVector{X, Y, M}); } - InferenceEngine::CNNNetwork network(f); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({})); + ASSERT_TRUE(f->get_results()[1]->get_output_partial_shape(0).compatible({1, 1, 1})); // concat output (seq len = 1, so it means num_iter = 1) - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({1, 1, 1})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[2]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[3]->get_output_partial_shape(0).compatible({1, 1, 1})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, - {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, - {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({32, 1, 10})); - // concat output - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({32, 10, 10})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({32, 1, 1})); + EXPECT_ANY_THROW(f->reshape({{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, + {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, + {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); } TEST(SmartReshapeTests, LoopStaticParameters) { @@ -174,29 +155,17 @@ TEST(SmartReshapeTests, LoopStaticParameters) { f = std::make_shared(OutputVector{out0, out1, out2, out3}, ParameterVector{X, Y, M}); } - InferenceEngine::CNNNetwork network(f); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE( - network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({})); + ASSERT_TRUE(f->get_results()[1]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); // concat output - ASSERT_TRUE( - network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); - ASSERT_TRUE( - network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[2]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[3]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, - {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, - {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({32, 1, 10})); - // concat output - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({32, 10, 10})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({32, 1, 1})); + EXPECT_ANY_THROW(f->reshape({{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, + {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, + {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); } TEST(SmartReshapeTests, LoopDynamicParameters) { @@ -242,29 +211,17 @@ TEST(SmartReshapeTests, LoopDynamicParameters) { f = std::make_shared(OutputVector{out0, out1, out2, out3}, ParameterVector{X, Y, M}); } - InferenceEngine::CNNNetwork network(f); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE( - network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({})); + ASSERT_TRUE(f->get_results()[1]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); // concat output - ASSERT_TRUE( - network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); - ASSERT_TRUE( - network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[2]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[3]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, - {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, - {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({32, 1, 10})); - // concat output - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({32, 10, 10})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({32, 1, 1})); + EXPECT_ANY_THROW(f->reshape({{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, + {f->get_parameters()[1]->get_friendly_name(), {32, 10, 1}}, + {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); } TEST(SmartReshapeTests, LoopParentParametersUsedInBody) { @@ -314,29 +271,17 @@ TEST(SmartReshapeTests, LoopParentParametersUsedInBody) { f = std::make_shared(OutputVector{out0, out1, out2, out3}, ParameterVector{X, Y, M}); } - InferenceEngine::CNNNetwork network(f); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE( - network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({})); + ASSERT_TRUE(f->get_results()[1]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); // concat output - ASSERT_TRUE( - network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); - ASSERT_TRUE( - network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[2]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); + ASSERT_TRUE(f->get_results()[3]->get_output_partial_shape(0).compatible(PartialShape::dynamic())); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {4, 3, 2}}, - {f->get_parameters()[1]->get_friendly_name(), {4, 3, 2}}, - {f->get_parameters()[2]->get_friendly_name(), {4, 3, 2}}})); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({4, 3, 2})); - // concat output - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({4, 30, 2})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({4, 3, 2})); + EXPECT_ANY_THROW(f->reshape({{f->get_parameters()[0]->get_friendly_name(), {4, 3, 2}}, + {f->get_parameters()[1]->get_friendly_name(), {4, 3, 2}}, + {f->get_parameters()[2]->get_friendly_name(), {4, 3, 2}}})); } TEST(SmartReshapeTests, TensorIteratorParentParameterUsedInBody) { @@ -381,24 +326,15 @@ TEST(SmartReshapeTests, TensorIteratorParentParameterUsedInBody) { f = std::make_shared(OutputVector{out0, out1, out2, out3}, ParameterVector{X, Y, M}); } - InferenceEngine::CNNNetwork network(f); - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[0]->get_output_partial_shape(0).compatible({})); + ASSERT_TRUE(f->get_results()[1]->get_output_partial_shape(0).compatible({1, 1, 1})); // concat output (seq len = 1, so it means num_iter = 1) - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({1, 1, 1})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[2]->get_output_partial_shape(0).compatible({1, 1, 1})); + ASSERT_TRUE(f->get_results()[3]->get_output_partial_shape(0).compatible({1, 1, 1})); auto unh = std::make_shared(); init_unique_names(f, unh); - ASSERT_NO_THROW(network.reshape( - InferenceEngine::ICNNNetwork::InputShapes{{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, - {f->get_parameters()[1]->get_friendly_name(), {1, 1, 1}}, - {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); - check_unique_names(f, unh); - - ASSERT_TRUE(network.getFunction()->get_results()[0]->get_output_partial_shape(0).compatible({})); - ASSERT_TRUE(network.getFunction()->get_results()[1]->get_output_partial_shape(0).compatible({32, 1, 10})); - // concat output - ASSERT_TRUE(network.getFunction()->get_results()[2]->get_output_partial_shape(0).compatible({32, 10, 10})); - ASSERT_TRUE(network.getFunction()->get_results()[3]->get_output_partial_shape(0).compatible({32, 1, 1})); + EXPECT_ANY_THROW(f->reshape({{f->get_parameters()[0]->get_friendly_name(), {32, 1, 10}}, + {f->get_parameters()[1]->get_friendly_name(), {1, 1, 1}}, + {f->get_parameters()[2]->get_friendly_name(), {32, 1, 10}}})); } diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 6c0da965f9bfad..1dac080461d16b 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -1188,7 +1188,7 @@ void constant_convert_test(element::Type type_from, } ASSERT_TRUE(actual.size() >= expected.size()); for (size_t i = 0; i < expected.size(); i++) { - ASSERT_EQ(expected[i], actual[i]); + EXPECT_EQ(expected[i], actual[i]) << "Elements with index " << i << " are not equal."; } } @@ -1378,7 +1378,7 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) { constant_convert_test(element::u1, element::u4, std::vector{171}, - {1, 0, 1, 0, 1, 0, 1, 1}); + {0, 1, 0, 1, 0, 1, 1, 1}); } TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) { diff --git a/src/common/transformations/tests/utils/primitives_priority_test.cpp b/src/common/transformations/tests/utils/primitives_priority_test.cpp index a748477e2b9137..64f6330a1da188 100644 --- a/src/common/transformations/tests/utils/primitives_priority_test.cpp +++ b/src/common/transformations/tests/utils/primitives_priority_test.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include @@ -22,8 +21,6 @@ using namespace ov; using namespace testing; -using namespace InferenceEngine; -using namespace InferenceEngine::details; TEST(TransformationTests, ConvBiasFusion) { std::shared_ptr f(nullptr); @@ -46,12 +43,7 @@ TEST(TransformationTests, ConvBiasFusion) { std::unordered_map pp; - InferenceEngine::CNNNetwork network(f); - - // Set PrimitivesPriority to all Convolutions - auto model = network.getFunction(); - ASSERT_NE(nullptr, model); - for (auto& op : model->get_ops()) { + for (auto& op : f->get_ops()) { if (auto conv = std::dynamic_pointer_cast(op)) { auto& rtInfo = conv->get_rt_info(); rtInfo[ov::PrimitivesPriority::get_type_info_static()] = ov::PrimitivesPriority("test"); @@ -59,8 +51,7 @@ TEST(TransformationTests, ConvBiasFusion) { } } - auto clonedNetwork = InferenceEngine::details::cloneNetwork(network); - auto funcs = clonedNetwork.getFunction(); + auto funcs = f->clone(); for (auto& op : funcs->get_ops()) { if (auto conv = std::dynamic_pointer_cast(op)) { diff --git a/src/common/util/CMakeLists.txt b/src/common/util/CMakeLists.txt index faaab5c26d22bc..49f9d1e19cf163 100644 --- a/src/common/util/CMakeLists.txt +++ b/src/common/util/CMakeLists.txt @@ -4,11 +4,11 @@ set(TARGET_NAME openvino_util) -file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) -file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) - set(UTIL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/) +file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB_RECURSE PUBLIC_HEADERS ${UTIL_INCLUDE_DIR}/*.hpp) + if (WIN32) # Remove linux specific files file(GLOB_RECURSE LIN_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/*.cpp @@ -41,13 +41,15 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${CMAKE_DL_LIBS}) if (WIN32) target_link_libraries(${TARGET_NAME} PRIVATE Shlwapi) endif() -target_include_directories(${TARGET_NAME} PUBLIC - $) - -ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) +target_include_directories(${TARGET_NAME} PUBLIC $) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME} SOURCE_DIRECTORIES ${UTIL_INCLUDE_DIR}) -openvino_developer_export_targets(COMPONENT core TARGETS ${TARGET_NAME}) +# install & export + +ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${UTIL_INCLUDE_DIR}/") diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 869b9a02c49272..d389c1862703bf 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -36,25 +36,29 @@ source_group("src" FILES ${LIBRARY_SRC}) source_group("include" FILES ${PUBLIC_HEADERS}) # -# Create ov_core_dev library +# Create openvino_core_dev library # -add_library(ov_core_dev INTERFACE) -add_library(openvino::core::dev ALIAS ov_core_dev) +add_library(openvino_core_dev INTERFACE) +add_library(openvino::core::dev ALIAS openvino_core_dev) -target_include_directories(ov_core_dev INTERFACE +target_include_directories(openvino_core_dev INTERFACE $ $ $ $) -target_link_libraries(ov_core_dev INTERFACE openvino::itt openvino::util) +target_link_libraries(openvino_core_dev INTERFACE openvino::itt openvino::util) -set_target_properties(ov_core_dev PROPERTIES EXPORT_NAME core::dev) -openvino_developer_export_targets(COMPONENT core TARGETS openvino::core::dev) +set_target_properties(openvino_core_dev PROPERTIES EXPORT_NAME core::dev) +ov_developer_package_export_targets(TARGET openvino::core::dev + INSTALL_INCLUDE_DIRECTORIES + "${OV_CORE_DEV_API_PATH}/" + "${OpenVINO_SOURCE_DIR}/src/common/transformations/include/" + "${OpenVINO_SOURCE_DIR}/src/common/low_precision_transformations/include/") # Install interface libraries for case BUILD_SHARED_LIBS=OFF -ov_install_static_lib(ov_core_dev ${OV_CPACK_COMP_CORE}) +ov_install_static_lib(openvino_core_dev ${OV_CPACK_COMP_CORE}) # Fix error LNK1248: image size (...) exceeds maximum allowable size (FFFFFFFF) # the symbolic debugging information will be stored in a separate .pdb file. diff --git a/src/core/builder/CMakeLists.txt b/src/core/builder/CMakeLists.txt index 64ce45a4870921..ee87ece0365d60 100644 --- a/src/core/builder/CMakeLists.txt +++ b/src/core/builder/CMakeLists.txt @@ -4,11 +4,11 @@ set(TARGET_NAME "openvino_builders") -file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) -file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) - set(BUILDER_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/) +file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB_RECURSE PUBLIC_HEADERS ${BUILDER_INCLUDE_DIR}/*.hpp) + # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj @@ -35,8 +35,9 @@ endif() ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) -ov_install_static_lib(openvino_builders ${OV_CPACK_COMP_CORE}) +# install & export -# developer package +ov_install_static_lib(openvino_builders ${OV_CPACK_COMP_CORE}) -openvino_developer_export_targets(COMPONENT core TARGETS openvino::builders) +ov_developer_package_export_targets(TARGET openvino::builders + INSTALL_INCLUDE_DIRECTORIES "${BUILDER_INCLUDE_DIR}/") diff --git a/src/core/dev_api/validation_util.hpp b/src/core/dev_api/validation_util.hpp index c214b404798a9c..e93fefd1411eb9 100644 --- a/src/core/dev_api/validation_util.hpp +++ b/src/core/dev_api/validation_util.hpp @@ -78,5 +78,10 @@ bool try_apply_auto_padding(const PartialShape& image_shape, CoordinateDiff& padding_above, CoordinateDiff& padding_below); +/// @brief Get the tensors shapes as ov::PartialShape. +/// +/// @param tensors Input tensors vector to get their shapes. +/// @return Vector of partial shapes same size as input tensor vector. +OPENVINO_API std::vector get_tensors_partial_shapes(const TensorVector& tensors); } // namespace util } // namespace ov diff --git a/src/core/include/openvino/core/node.hpp b/src/core/include/openvino/core/node.hpp index 860290617709a7..ac1c61ce0f18d3 100644 --- a/src/core/include/openvino/core/node.hpp +++ b/src/core/include/openvino/core/node.hpp @@ -554,21 +554,17 @@ OPENVINO_API void NodeValidationFailure::create(const CheckLocInfo& check_loc_in NODE_VALIDATION_CHECK(std::make_pair(static_cast((node)), &(input_shapes)), __VA_ARGS__) namespace ov { -template -void check_new_args_count(const Node* node, T new_args) { - NODE_VALIDATION_CHECK(node, - new_args.size() == node->input_values().size(), - "clone_with_new_inputs() expected ", - node->input_values().size(), - " argument", - (node->input_values().size() == 1 ? "" : "s"), - " but got ", - new_args.size()); -} -} // namespace ov +/** + * @brief Check new arguments size if match node inputs count. + * + * This check is required in cloning ov::Node. + * + * @param node Pointer to node. + * @param new_args Vector with new outputs to check. + */ +void OPENVINO_API check_new_args_count(const Node* const node, const OutputVector& new_args); -namespace ov { /// \brief Visits a reference to a node that has been registered with the visitor. template <> class OPENVINO_API AttributeAdapter> : public VisitorAdapter { diff --git a/src/core/include/openvino/core/shape.hpp b/src/core/include/openvino/core/shape.hpp index 392bd9c48bd9d5..a04a864a8394fb 100644 --- a/src/core/include/openvino/core/shape.hpp +++ b/src/core/include/openvino/core/shape.hpp @@ -42,19 +42,6 @@ class Shape : public std::vector { OPENVINO_API std::string to_string() const; }; -/** - * @brief Number of elements in spanned by a shape - * @ingroup ov_model_cpp_api - */ -template -size_t shape_size(const SHAPE_TYPE& shape) { - size_t size = 1; - for (auto d : shape) { - size *= d; - } - return size; -} - /** * Number of elements in a subset of dimensions of a shape. * Returns a product of dimensions in a range [start_dim;end_dim) @@ -72,6 +59,15 @@ size_t shape_size(ForwardIt start_dim, const ForwardIt end_dim) { std::multiplies::value_type>()); } +/** + * @brief Number of elements in spanned by a shape + * @ingroup ov_model_cpp_api + */ +template +size_t shape_size(const SHAPE_TYPE& shape) { + return shape_size(shape.begin(), shape.end()); +} + /// Row-major strides for a shape template std::vector row_major_strides(const SHAPE_TYPE& shape) { diff --git a/src/core/include/openvino/op/batch_to_space.hpp b/src/core/include/openvino/op/batch_to_space.hpp index 6609e539087628..2dbbf018913fd3 100644 --- a/src/core/include/openvino/op/batch_to_space.hpp +++ b/src/core/include/openvino/op/batch_to_space.hpp @@ -37,9 +37,7 @@ class OPENVINO_API BatchToSpace : public Op { const Output& block_shape, const Output& crops_begin, const Output& crops_end); - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; void validate_and_infer_types() override; diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 14ee7b3313490e..e122d36a8223d1 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -426,7 +426,7 @@ class OPENVINO_API Constant : public Op { typename StorageDataType = fundamental_type_for, typename std::enable_if::type = true> StorageDataType get_element_value(size_t index) const { - return (get_data_ptr()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F; + return (get_data_ptr()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F; } template , typename std::enable_if::type = true> StorageDataType get_element_value(size_t index) const { - const uint8_t i4data = (get_data_ptr()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F; + const uint8_t i4data = (get_data_ptr()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F; const bool is_negative_number = (i4data >> 3) & 0x01; const int8_t data = is_negative_number ? i4data | 0xF0 : i4data; return data; @@ -530,7 +530,7 @@ class OPENVINO_API Constant : public Op { const auto round_element_no = element_number % 2 ? element_number + 1 : element_number; output.reserve(round_element_no); // adds 1 more elements here? std::for_each(source_begin, source_end, [&](IN_T c) { - for (const auto i : {4, 0}) { + for (const auto i : {0, 4}) { const uint8_t data = (c >> i) & 0x0F; output.push_back(data); } @@ -548,7 +548,7 @@ class OPENVINO_API Constant : public Op { const auto round_element_no = element_number % 2 ? element_number + 1 : element_number; output.reserve(round_element_no); // adds 1 more elements here? std::for_each(source_begin, source_end, [&](IN_T c) { - for (const auto i : {4, 0}) { + for (const auto i : {0, 4}) { const uint8_t i4data = (c >> i) & 0x0F; const bool is_negative_number = (i4data >> 3) & 0x01; const int8_t data = is_negative_number ? i4data | 0xF0 : i4data; @@ -663,27 +663,9 @@ class OPENVINO_API Constant : public Op { template , - typename std::enable_if::type = true> - void write_buffer(const std::vector& source) { - auto p = get_data_ptr_nc(); - size_t i = 0; - for (; i < source.size() / 2; i++) { - const auto v1 = value_in_range(source[i * 2]) & 0x0F; - const auto v2 = value_in_range(source[i * 2 + 1]) & 0x0F; - const auto v = (v1 << 4) | v2; - p[i] = static_cast(v); - } - if (source.size() % 2) { - const auto v1 = value_in_range(source[i * 2]) & 0x0F; - const auto v = v1 << 4; - p[i] = static_cast(v); - } - } - - template , - typename std::enable_if::value, bool>::type = true> + typename std::enable_if::value), + bool>::type = true> void write_buffer(const std::vector& source) { auto p = get_data_ptr_nc(); size_t i = 0; diff --git a/src/core/include/openvino/op/logical_and.hpp b/src/core/include/openvino/op/logical_and.hpp index 6d55f8f3585e0f..382679d16b78e1 100644 --- a/src/core/include/openvino/op/logical_and.hpp +++ b/src/core/include/openvino/op/logical_and.hpp @@ -35,10 +35,7 @@ class OPENVINO_API LogicalAnd : public util::BinaryElementwiseLogical { const AutoBroadcastSpec& auto_broadcast = AutoBroadcastSpec(AutoBroadcastType::NUMPY)); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - bool visit_attributes(AttributeVisitor& visitor) override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/include/openvino/op/logical_not.hpp b/src/core/include/openvino/op/logical_not.hpp index c5421b8db14a47..052aed0a09ad24 100644 --- a/src/core/include/openvino/op/logical_not.hpp +++ b/src/core/include/openvino/op/logical_not.hpp @@ -24,9 +24,7 @@ class OPENVINO_API LogicalNot : public Op { void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/include/openvino/op/logical_or.hpp b/src/core/include/openvino/op/logical_or.hpp index 15c00eea04baf3..1dab36217b175a 100644 --- a/src/core/include/openvino/op/logical_or.hpp +++ b/src/core/include/openvino/op/logical_or.hpp @@ -34,9 +34,7 @@ class OPENVINO_API LogicalOr : public util::BinaryElementwiseLogical { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/include/openvino/op/max_pool.hpp b/src/core/include/openvino/op/max_pool.hpp index c1741eef6cb717..534f8b1d067397 100644 --- a/src/core/include/openvino/op/max_pool.hpp +++ b/src/core/include/openvino/op/max_pool.hpp @@ -43,13 +43,8 @@ class OPENVINO_API MaxPool : public op::util::MaxPoolBase { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; - -private: - bool evaluate_maxpool(const HostTensorVector& outputs, const HostTensorVector& inputs) const; }; } // namespace v1 @@ -119,10 +114,8 @@ class OPENVINO_API MaxPool : public op::util::MaxPoolBase { m_axis = axis; } + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector&, const HostTensorVector&) const override; - OPENVINO_SUPPRESS_DEPRECATED_END private: Strides m_dilations; diff --git a/src/core/include/openvino/op/split.hpp b/src/core/include/openvino/op/split.hpp index 918457c0d84a05..6137f0591cfba1 100644 --- a/src/core/include/openvino/op/split.hpp +++ b/src/core/include/openvino/op/split.hpp @@ -39,9 +39,8 @@ class OPENVINO_API Split : public Op { void set_num_splits(const size_t num_splits) { m_num_splits = num_splits; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; bool has_evaluate() const override; diff --git a/src/core/include/openvino/op/squeeze.hpp b/src/core/include/openvino/op/squeeze.hpp index 28f098be406bf7..e66cfb3d27667a 100644 --- a/src/core/include/openvino/op/squeeze.hpp +++ b/src/core/include/openvino/op/squeeze.hpp @@ -20,11 +20,8 @@ class OPENVINO_API Squeeze : public Op { Squeeze(const Output& data, const Output& axes); Squeeze(const Output& data); - bool visit_attributes(AttributeVisitor& visitor) override; void validate_and_infer_types() override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; diff --git a/src/core/include/openvino/op/topk.hpp b/src/core/include/openvino/op/topk.hpp index 9c2ec7a9ce1492..cfc6ccd6cc5462 100644 --- a/src/core/include/openvino/op/topk.hpp +++ b/src/core/include/openvino/op/topk.hpp @@ -36,7 +36,7 @@ class OPENVINO_API TopK : public util::TopKBase { /// the biggest element of two. /// \param sort Specifies order of output elements and/or indices /// Accepted values: none, index, value - /// \param index_element_type Specyfies type of produced indices + /// \param index_element_type Specifies type of produced indices TopK(const Output& data, const Output& k, const int64_t axis, @@ -53,9 +53,7 @@ class OPENVINO_API TopK : public util::TopKBase { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; protected: @@ -83,7 +81,7 @@ class OPENVINO_API TopK : public util::TopKBase { /// the biggest element of two. /// \param sort Specifies order of output elements and/or indices /// Accepted values: none, index, value - /// \param index_element_type Specyfies type of produced indices + /// \param index_element_type Specifies type of produced indices TopK(const Output& data, const Output& k, const int64_t axis, @@ -99,9 +97,7 @@ class OPENVINO_API TopK : public util::TopKBase { const element::Type& index_element_type = element::i32); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v3 @@ -153,9 +149,7 @@ class OPENVINO_API TopK : public util::TopKBase { bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; bool get_stable() const { diff --git a/src/core/include/openvino/op/util/evaluate_helpers.hpp b/src/core/include/openvino/op/util/evaluate_helpers.hpp deleted file mode 100644 index 616528adf60d08..00000000000000 --- a/src/core/include/openvino/op/util/evaluate_helpers.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/core/partial_shape.hpp" -#include "openvino/runtime/tensor.hpp" - -namespace ov { -namespace op { -namespace util { - -/** - * @brief Get the tensors shapes as ov::PartialShape. - * - * @param tensors Input tensors vector to get its shapes. - * @return Vector of partial shapes sam size as input tensor vector. - */ -std::vector get_tensors_partial_shapes(const TensorVector& tensors); -} // namespace util -} // namespace op -} // namespace ov diff --git a/src/core/include/openvino/op/variadic_split.hpp b/src/core/include/openvino/op/variadic_split.hpp index 2d6f751d48d3ba..49cb5dcc714502 100644 --- a/src/core/include/openvino/op/variadic_split.hpp +++ b/src/core/include/openvino/op/variadic_split.hpp @@ -29,25 +29,17 @@ class OPENVINO_API VariadicSplit : public Op { /// outputs. The sum of split_lengths must match data.shape[axis] VariadicSplit(const Output& data, const Output& axis, const Output& split_lengths); - bool visit_attributes(AttributeVisitor& visitor) override; - void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; size_t get_default_output_index() const override { return no_default_index(); } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; bool has_evaluate() const override; bool evaluate_label(TensorLabelVector& output_labels) const override; - -private: - bool evaluate_variadic_split(const HostTensorVector& outputs, const HostTensorVector& inputs) const; - bool has_axis_and_splits_bound_set() const; }; } // namespace v1 } // namespace op diff --git a/src/core/reference/CMakeLists.txt b/src/core/reference/CMakeLists.txt index 4154a1455ffef0..e868c07c391e96 100644 --- a/src/core/reference/CMakeLists.txt +++ b/src/core/reference/CMakeLists.txt @@ -4,11 +4,11 @@ set(TARGET_NAME "openvino_reference") -file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) -file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) - set(REF_IMPL_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") +file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB_RECURSE PUBLIC_HEADERS ${REF_IMPL_INCLUDE_DIR}/*.hpp) + # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj @@ -46,7 +46,9 @@ target_link_libraries(${TARGET_NAME} PRIVATE Threads::Threads openvino::core::de ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) +# install & export + ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) -# developer package -openvino_developer_export_targets(COMPONENT core TARGETS openvino::reference) +ov_developer_package_export_targets(TARGET openvino::reference + INSTALL_INCLUDE_DIRECTORIES "${REF_IMPL_INCLUDE_DIR}/") diff --git a/src/core/reference/include/openvino/reference/and.hpp b/src/core/reference/include/openvino/reference/and.hpp index 326e4b59d773af..8f43b045d6398c 100644 --- a/src/core/reference/include/openvino/reference/and.hpp +++ b/src/core/reference/include/openvino/reference/and.hpp @@ -4,31 +4,37 @@ #pragma once -#include +#include +#include #include "openvino/core/shape.hpp" -#include "openvino/op/util/attr_types.hpp" #include "openvino/reference/autobroadcast_binop.hpp" namespace ov { namespace reference { -template +template void logical_and(const T* arg0, const T* arg1, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(arg0[i] && arg1[i]); - } + std::transform(arg0, std::next(arg0, count), arg1, out, std::logical_and()); } -template +/** + * @brief Reference implementation of binary elementwise LogicalAnd operator. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg_shape0 Input 0 shape. + * @param arg_shape1 Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ +template void logical_and(const T* arg0, const T* arg1, T* out, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T { - return static_cast(x && y); - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, std::logical_and()); } } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/convert.hpp b/src/core/reference/include/openvino/reference/convert.hpp index e943e548a8fa4e..bd36b50b03301d 100644 --- a/src/core/reference/include/openvino/reference/convert.hpp +++ b/src/core/reference/include/openvino/reference/convert.hpp @@ -14,7 +14,7 @@ namespace reference { namespace detail { inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) { const size_t byte_idx = idx / 8; - const uint8_t bit_idx = 7 - (idx % 8); + const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits if (val) { buf[byte_idx] |= (1 << bit_idx); } else { @@ -24,33 +24,33 @@ inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) { inline uint8_t get_u1(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 8; - const uint8_t bit_idx = 7 - (idx % 8); + const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits return (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0; } inline void set_u4(uint8_t* buf, size_t idx, uint8_t val) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's } inline uint8_t get_u4(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); return (buf[byte_idx] >> bit_shift) & 0xF; } inline void set_i4(uint8_t* buf, size_t idx, int8_t val) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's } inline int8_t get_i4(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); uint8_t val = (buf[byte_idx] >> bit_shift) & 0xF; if (val & 0x08) { // negative number val |= 0xF0; diff --git a/src/core/reference/include/openvino/reference/logical_not.hpp b/src/core/reference/include/openvino/reference/logical_not.hpp new file mode 100644 index 00000000000000..ca31a824b50d5f --- /dev/null +++ b/src/core/reference/include/openvino/reference/logical_not.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ov { +namespace reference { + +/** + * @brief Reference implementation of LogicalNot operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ +template +void logical_not(const T* arg, T* out, const size_t count) { + std::transform(arg, std::next(arg, count), out, std::logical_not()); +} +} // namespace reference +} // namespace ov diff --git a/src/core/reference/include/openvino/reference/not.hpp b/src/core/reference/include/openvino/reference/not.hpp deleted file mode 100644 index e0444a8eb73a2a..00000000000000 --- a/src/core/reference/include/openvino/reference/not.hpp +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -namespace ov { -namespace reference { -template -void logical_not(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(!(arg[i])); - } -} -} // namespace reference -} // namespace ov diff --git a/src/core/reference/include/openvino/reference/or.hpp b/src/core/reference/include/openvino/reference/or.hpp index 7e821de63e3c03..4b0d760ec41349 100644 --- a/src/core/reference/include/openvino/reference/or.hpp +++ b/src/core/reference/include/openvino/reference/or.hpp @@ -4,31 +4,38 @@ #pragma once -#include +#include +#include #include "openvino/core/shape.hpp" -#include "openvino/op/util/attr_types.hpp" #include "openvino/reference/autobroadcast_binop.hpp" namespace ov { namespace reference { -template -void logical_or(const T* arg0, const T* arg1, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(arg0[i] || arg1[i]); - } + +template +void logical_or(const T* arg0, const T* arg1, T* out, const size_t count) { + std::transform(arg0, std::next(arg0, count), arg1, out, std::logical_or()); } -template +/** + * @brief Reference implementation of binary elementwise LogicalOr operator. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg_shape0 Input 0 shape. + * @param arg_shape1 Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ +template void logical_or(const T* arg0, const T* arg1, T* out, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T { - return static_cast(x || y); - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, std::logical_or()); } } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/split.hpp b/src/core/reference/include/openvino/reference/split.hpp index dcbede1883a409..6e3564ed035bb1 100644 --- a/src/core/reference/include/openvino/reference/split.hpp +++ b/src/core/reference/include/openvino/reference/split.hpp @@ -4,17 +4,28 @@ #pragma once -#include +#include -#include "openvino/reference/slice.hpp" +#include "openvino/core/shape.hpp" namespace ov { namespace reference { + +/** + * @brief Reference implementation of Split operator. + * + * @param data Pointer to input data. + * @param data_shape Input data shape. + * @param elem_size Size of single element type. + * @param axis Axis used for split input data. + * @param num_splits Number of splits + * @param out_data Pointer to output data pointers (must have size of num_splits) + */ void split(const char* data, const Shape& data_shape, size_t elem_size, int64_t axis, size_t num_splits, char** out_data); -} +} // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/topk.hpp b/src/core/reference/include/openvino/reference/topk.hpp index c84fb54e9962bb..76ce901eb27f9a 100644 --- a/src/core/reference/include/openvino/reference/topk.hpp +++ b/src/core/reference/include/openvino/reference/topk.hpp @@ -8,7 +8,7 @@ #include #include -#include "openvino/op/topk.hpp" +#include "openvino/op/util/attr_types.hpp" #include "openvino/reference/utils/coordinate_index.hpp" #include "openvino/reference/utils/coordinate_transform.hpp" @@ -17,23 +17,11 @@ namespace reference { // This used to be lambda expressions but MSVC had difficulty compiling it. This way is more explicit. template inline bool compare_max(const std::tuple& a, const std::tuple& b) { -// this is intentional to be able to compare floats directly -// without using relative or absolute tolerance -#if defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wfloat-equal" -#endif - if (std::get<0>(a) == std::get<0>(b)) { + if (std::get<0>(a) != std::get<0>(b)) { + return D ? std::get<0>(a) > std::get<0>(b) : std::get<0>(a) < std::get<0>(b); + } else { return std::get<1>(a) < std::get<1>(b); } -#if defined(__GNUC__) -# pragma GCC diagnostic pop -#endif - - if (D) - return std::get<0>(a) > std::get<0>(b); - else - return std::get<0>(a) < std::get<0>(b); } template @@ -41,63 +29,76 @@ inline bool compare_indices_ascending(const std::tuple& a, const std::tupl return std::get<1>(a) < std::get<1>(b); } -// TopK reference implementation provides stable indices output +/** + * @brief Reference implementation fo TopK operator + * + * @param arg Pointer to input data. + * @param out_indices Pointer to output indicies. + * @param out_values Pointer to output values. + * @param in_shape Input data shape. + * @param out_shape Output data (values, indicies) shape. + * @param axis Axis for search of top K elements. + * @param k Number to find of top elements. + * @param compute_max Select mode of find max or min. + * @param sort Sorting type. + */ template void topk(const T* arg, U* out_indices, T* out_values, const Shape& in_shape, const Shape& out_shape, - size_t axis, - size_t k, - bool compute_max, - op::TopKSortType sort = op::TopKSortType::NONE) { - using namespace std; + const size_t axis, + const size_t k, + const bool compute_max, + const op::TopKSortType sort = op::TopKSortType::NONE) { // Create temp vector for sorting. - vector> workspace(in_shape[axis]); - vector in_strides = row_major_strides(in_shape); - vector out_strides = row_major_strides(out_shape); - auto in_axis_stride = in_strides[axis]; - auto out_axis_stride = out_strides[axis]; + std::vector> workspace(in_shape[axis]); + const auto in_strides = row_major_strides(in_shape); + const auto out_strides = row_major_strides(out_shape); + const auto in_axis_stride = in_strides[axis]; + const auto out_axis_stride = out_strides[axis]; // Iterate over elements with 0 index at "axis" dimension auto traverse_shape = in_shape; traverse_shape[axis] = 1; CoordinateTransformBasic traverse_transform(traverse_shape); - for (const Coordinate& coord : traverse_transform) { + for (const auto& coord : traverse_transform) { auto arg_index = coordinate_index(coord, in_shape); auto out_index = coordinate_index(coord, out_shape); // Fill the temp vector U i = 0; - for (tuple& entry : workspace) { - get<0>(entry) = arg[arg_index]; - get<1>(entry) = i; + for (auto& entry : workspace) { + std::get<0>(entry) = arg[arg_index]; + std::get<1>(entry) = i; arg_index += in_axis_stride; - i++; - } - // Sort the temp vector - if (compute_max) { - nth_element(workspace.begin(), workspace.begin() + k, workspace.end(), compare_max); - } else { - nth_element(workspace.begin(), workspace.begin() + k, workspace.end(), compare_max); + ++i; } - // Write temp vector to output + + const auto cmp_func = compute_max ? compare_max : compare_max; + + typename std::decay::type sort_func; switch (sort) { - case op::TopKSortType::NONE: - break; case op::TopKSortType::SORT_INDICES: - std::sort(workspace.begin(), workspace.begin() + k, compare_indices_ascending); + sort_func = compare_indices_ascending; break; case op::TopKSortType::SORT_VALUES: - if (compute_max) - std::sort(workspace.begin(), workspace.begin() + k, compare_max); - else - std::sort(workspace.begin(), workspace.begin() + k, compare_max); + sort_func = cmp_func; + break; + default: + sort_func = nullptr; + break; } - for (size_t j = 0; j < k; j++) { + + std::nth_element(workspace.begin(), workspace.begin() + k, workspace.end(), cmp_func); + if (sort_func) { + std::sort(workspace.begin(), workspace.begin() + k, sort_func); + } + + for (size_t j = 0; j < k; ++j) { const auto& entry = workspace[j]; - out_values[out_index] = get<0>(entry); - out_indices[out_index] = get<1>(entry); + out_values[out_index] = std::get<0>(entry); + out_indices[out_index] = std::get<1>(entry); out_index += out_axis_stride; } } diff --git a/src/core/reference/src/op/split.cpp b/src/core/reference/src/op/split.cpp index 6186bdd5af941d..855fc29c4a1be9 100644 --- a/src/core/reference/src/op/split.cpp +++ b/src/core/reference/src/op/split.cpp @@ -6,35 +6,43 @@ #include -#include +#include -using namespace ov; +#include "openvino/core/coordinate.hpp" +#include "openvino/reference/slice.hpp" -void reference::split(const char* data, - const Shape& data_shape, - size_t elem_size, - int64_t axis, - size_t num_splits, - char** out_data) { +namespace ov { +namespace reference { + +void split(const char* data, + const Shape& data_shape, + const size_t elem_size, + const int64_t axis, + const size_t num_splits, + char** out_data) { const size_t part_length = data_shape.at(axis) / num_splits; - Shape output_shape = data_shape; - output_shape.at(axis) = part_length; + auto output_shape = data_shape; + output_shape[axis] = part_length; - std::vector lower_bounds(data_shape.size(), 0); - std::vector upper_bounds = data_shape; - upper_bounds.at(axis) = part_length; + Coordinate lower_bounds(data_shape.size(), 0); + Coordinate upper_bounds = output_shape; + auto& lb_at_axis = lower_bounds[axis]; + auto& ub_at_axis = upper_bounds[axis]; - for (size_t i = 0; i < num_splits; ++i) { + const auto out_last = std::next(out_data, num_splits); + for (auto out_first = out_data; out_first != out_last; ++out_first) { reference::slice(data, - out_data[i], + *out_first, data_shape, lower_bounds, upper_bounds, Strides(lower_bounds.size(), 1), output_shape, elem_size); - lower_bounds.at(axis) += part_length; - upper_bounds.at(axis) += part_length; + lb_at_axis += part_length; + ub_at_axis += part_length; } } +} // namespace reference +} // namespace ov diff --git a/src/core/shape_inference/CMakeLists.txt b/src/core/shape_inference/CMakeLists.txt index b04f0cf8573b85..db862ac520d0b5 100644 --- a/src/core/shape_inference/CMakeLists.txt +++ b/src/core/shape_inference/CMakeLists.txt @@ -4,11 +4,11 @@ set(TARGET_NAME "openvino_shape_inference") -file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) -file(GLOB_RECURSE PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) - set(SHAPE_INFER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") +file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB_RECURSE PUBLIC_HEADERS ${SHAPE_INFER_INCLUDE_DIR}/*.hpp) + # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj @@ -24,7 +24,7 @@ set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME shape_inference) target_include_directories(${TARGET_NAME} PUBLIC $ $ - $>) + $) ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) @@ -32,7 +32,9 @@ if(NOT BUILD_SHARED_LIBS) target_compile_definitions(${TARGET_NAME} PUBLIC OPENVINO_STATIC_LIBRARY) endif() +# developer package + ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) -# developer package -openvino_developer_export_targets(COMPONENT core TARGETS ${TARGET_NAME}) +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${SHAPE_INFER_INCLUDE_DIR}/") diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp index cf3dc5bf21e3da..1b1093b871c657 100644 --- a/src/core/src/bound_evaluate.cpp +++ b/src/core/src/bound_evaluate.cpp @@ -479,13 +479,13 @@ bool ov::interval_bound_evaluator(const Node* node, return fully_defined; } -bool ov::tensor_is_positive(const Tensor& bound) { +bool ov::tensor_is_non_negative(const Tensor& bound) { const auto bound_constant = std::make_shared(bound.get_element_type(), bound.get_shape(), bound.data()); const auto zero_constant = op::v0::Constant::create(bound.get_element_type(), {1}, {0}); OutputVector greater(1); - bool folded = std::make_shared(bound_constant, zero_constant) + bool folded = std::make_shared(bound_constant, zero_constant) ->constant_fold(greater, {bound_constant, zero_constant}); OPENVINO_ASSERT(folded); @@ -500,6 +500,50 @@ bool ov::tensor_is_positive(const Tensor& bound) { return std::dynamic_pointer_cast(all[0].get_node_shared_ptr())->cast_vector()[0]; } +bool ov::tensor_has_max_value(const Tensor& bound) { + const auto bound_constant = + std::make_shared(bound.get_element_type(), bound.get_shape(), bound.data()); + OPENVINO_SUPPRESS_DEPRECATED_START + auto max_constant = ngraph::get_constant_max_of_type(bound.get_element_type()); + OPENVINO_SUPPRESS_DEPRECATED_END + OutputVector equal(1); + + bool folded = std::make_shared(bound_constant, max_constant) + ->constant_fold(equal, {bound_constant, max_constant}); + OPENVINO_ASSERT(folded); + + auto axes_vector = std::vector(equal[0].get_shape().size()); + std::iota(axes_vector.begin(), axes_vector.end(), 0); + const auto axes = op::v0::Constant::create(element::i64, {axes_vector.size()}, axes_vector); + + OutputVector all(1); + folded = std::make_shared(equal[0], axes)->constant_fold(all, {equal[0], axes}); + OPENVINO_ASSERT(folded && ov::is_type(all[0].get_node_shared_ptr())); + OPENVINO_ASSERT(all[0].get_shape() == Shape{}); + return std::dynamic_pointer_cast(all[0].get_node_shared_ptr())->cast_vector()[0]; +} + +bool ov::tensor_has_zero_value(const Tensor& bound) { + const auto bound_constant = + std::make_shared(bound.get_element_type(), bound.get_shape(), bound.data()); + const auto zero_constant = op::v0::Constant::create(bound.get_element_type(), {1}, {0}); + OutputVector equal(1); + + bool folded = std::make_shared(bound_constant, zero_constant) + ->constant_fold(equal, {bound_constant, zero_constant}); + OPENVINO_ASSERT(folded); + + auto axes_vector = std::vector(equal[0].get_shape().size()); + std::iota(axes_vector.begin(), axes_vector.end(), 0); + const auto axes = op::v0::Constant::create(element::i64, {axes_vector.size()}, axes_vector); + + OutputVector all(1); + folded = std::make_shared(equal[0], axes)->constant_fold(all, {equal[0], axes}); + OPENVINO_ASSERT(folded && ov::is_type(all[0].get_node_shared_ptr())); + OPENVINO_ASSERT(all[0].get_shape() == Shape{}); + return std::dynamic_pointer_cast(all[0].get_node_shared_ptr())->cast_vector()[0]; +} + bool ov::has_and_set_equal_bounds(const Output& source) { if (op::util::is_constant(source.get_node_shared_ptr())) return true; @@ -509,7 +553,7 @@ bool ov::has_and_set_equal_bounds(const Output& source) { } bool ov::have_node_inputs_bounds_set(const Node* const node, const size_t first_idx, const size_t last_idx) { - bool have_bound_set = last_idx <= node->get_input_size(); + bool have_bound_set = last_idx < node->get_input_size(); for (size_t i = first_idx; have_bound_set && (i <= last_idx); ++i) { have_bound_set = node->get_input_tensor(i).has_and_set_bound(); } diff --git a/src/core/src/bound_evaluate.hpp b/src/core/src/bound_evaluate.hpp index 297f69d661f131..952343a7d0076b 100644 --- a/src/core/src/bound_evaluate.hpp +++ b/src/core/src/bound_evaluate.hpp @@ -9,8 +9,14 @@ namespace ov { // bool could_propagate(const Output& output, std::vector& order); -/// \brief Checks if all the elements of the bound Tensor are positive -bool tensor_is_positive(const Tensor& bound); +/// \brief Checks if all the elements of the bound Tensor are non-negative +bool tensor_is_non_negative(const Tensor& bound); + +/// \brief Checks if any element of the bound Tensor has max possible value +bool tensor_has_max_value(const Tensor& bound); + +/// \brief Checks if any element of the bound Tensor has zero value +bool tensor_has_zero_value(const Tensor& bound); /// \brief Estimates upper bound for node output tensors using only upper bounds of the nodes /// inputs. diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index ee2c454bb6a235..492f0dec1e3a04 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -844,6 +844,17 @@ bool ov::Node::visit_attributes(AttributeVisitor&) { } namespace ov { +void check_new_args_count(const Node* const node, const OutputVector& new_args) { + NODE_VALIDATION_CHECK(node, + new_args.size() == node->input_values().size(), + "clone_with_new_inputs() expected ", + node->input_values().size(), + " argument", + (node->input_values().size() == 1 ? "" : "s"), + " but got ", + new_args.size()); +} + AttributeAdapter>::AttributeAdapter(std::shared_ptr& value) : m_ref(value) {} bool AttributeAdapter>::visit_attributes(AttributeVisitor& visitor) { diff --git a/src/core/src/op/batch_to_space.cpp b/src/core/src/op/batch_to_space.cpp index da2c2c5fa703a1..0b522b5156b017 100644 --- a/src/core/src/op/batch_to_space.cpp +++ b/src/core/src/op/batch_to_space.cpp @@ -2,33 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/batch_to_space.hpp" - -#include -#include -#include -#include -#include -#include -#include +#include "openvino/op/batch_to_space.hpp" +#include "batch_to_space_shape_inference.hpp" #include "itt.hpp" -#include "ngraph/builder/make_constant.hpp" -#include "ngraph/node.hpp" -#include "ngraph/opsets/opset3.hpp" -#include "ngraph/shape.hpp" #include "openvino/op/util/precision_sensitive_attribute.hpp" #include "openvino/op/util/slice_plan.hpp" #include "openvino/reference/reshape.hpp" #include "openvino/reference/strided_slice.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace v1 { -ngraph::op::v1::BatchToSpace::BatchToSpace(const ngraph::Output& data, - const ngraph::Output& block_shape, - const ngraph::Output& crops_begin, - const ngraph::Output& crops_end) +BatchToSpace::BatchToSpace(const Output& data, + const Output& block_shape, + const Output& crops_begin, + const Output& crops_end) : Op({data, block_shape, crops_begin, crops_end}) { ov::mark_as_precision_sensitive(input(1)); ov::mark_as_precision_sensitive(input(2)); @@ -36,7 +26,7 @@ ngraph::op::v1::BatchToSpace::BatchToSpace(const ngraph::Output& d constructor_validate_and_infer_types(); } -void op::v1::BatchToSpace::validate_and_infer_types() { +void BatchToSpace::validate_and_infer_types() { OV_OP_SCOPE(v1_BatchToSpace_validate_and_infer_types); const auto& data_et = get_input_element_type(0); @@ -66,30 +56,29 @@ void op::v1::BatchToSpace::validate_and_infer_types() { set_output_type(0, data_et, output_shape); } -std::shared_ptr ngraph::op::v1::BatchToSpace::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr BatchToSpace::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_BatchToSpace_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); } -bool ngraph::op::v1::BatchToSpace::visit_attributes(ngraph::AttributeVisitor& visitor) { +bool BatchToSpace::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_BatchToSpace_visit_attributes); return true; } -OPENVINO_SUPPRESS_DEPRECATED_START namespace { -bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) { - auto data = inputs[0]; - const auto elem_size = data->get_element_type().size(); +bool batch_to_space_evaluate(TensorVector& outputs, const TensorVector& inputs) { + const auto& in = inputs[0]; + const auto elem_size = in.get_element_type().size(); - auto data_shape = data->get_shape(); + auto data_shape = in.get_shape(); - auto const block_values_size = shape_size(inputs[1]->get_shape()); + auto const block_values_size = shape_size(inputs[1].get_shape()); - const auto* block_values = inputs[1]->get_data_ptr(); - const auto* crops_begin_values = inputs[2]->get_data_ptr(); - const auto* crops_end_values = inputs[3]->get_data_ptr(); + const auto* block_values = inputs[1].data(); + const auto* crops_begin_values = inputs[2].data(); + const auto* crops_end_values = inputs[3].data(); ov::Shape dispersed_shape(1); dispersed_shape.insert(dispersed_shape.end(), data_shape.begin(), data_shape.end()); @@ -101,7 +90,13 @@ bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVe return false; } - auto* flat_data = data->get_data_ptr(); + auto* in_first = static_cast(in.data()); + + // Copy input tensor to not overwrite evaluate's inputs tensors passed as const. + // The evaluate algorithm should be improved to avoid additional data copy. + auto flat_in = Tensor(in.get_element_type(), data_shape); + auto* flat_data = static_cast(flat_in.data()); + std::memcpy(flat_data, in_first, flat_in.get_byte_size()); std::vector dispersed_data(shape_size(data_shape) * elem_size); ov::Shape post_transpose_shape(axes_order.size()); @@ -117,15 +112,15 @@ bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVe dispersed_shape, elem_size); - size_t val = 1; - for (size_t axis_idx = 0; axis_idx <= block_values_size; ++axis_idx) { + for (size_t axis_idx = 0, val = 1; axis_idx <= block_values_size; ++axis_idx) { if ((block_idx + 1) == axis_idx) { axes_order[axis_idx] = 0; } else { axes_order[axis_idx] = val; - val++; + ++val; } } + for (size_t axis_idx = 0; axis_idx < axes_order.size(); ++axis_idx) { post_transpose_shape[axis_idx] = dispersed_shape[axes_order[axis_idx]]; } @@ -148,61 +143,52 @@ bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVe data_shape = squeezed_shape; } - std::vector upperbounds_values(data_shape.size()); + std::vector upper_bounds_values(data_shape.size()); for (size_t i = 0; i < data_shape.size(); ++i) { - upperbounds_values[i] = data_shape[i] - crops_end_values[i]; + upper_bounds_values[i] = data_shape[i] - crops_end_values[i]; } std::vector begin_mask(data_shape.size(), 0); std::vector end_mask(data_shape.size(), 0); - std::vector begins(shape_size(inputs[2]->get_shape())); - begins.assign(crops_begin_values, crops_begin_values + shape_size(inputs[2]->get_shape())); + std::vector begins(shape_size(inputs[2].get_shape())); + begins.assign(crops_begin_values, crops_begin_values + shape_size(inputs[2].get_shape())); std::vector default_strides(begins.size(), 1); const auto slice_plan = ov::op::util::make_slice_plan(data_shape, begins, - upperbounds_values, + upper_bounds_values, default_strides, begin_mask, end_mask, AxisSet(), AxisSet(), AxisSet()); - ov::reference::strided_slice(flat_data, outputs[0]->get_data_ptr(), data_shape, slice_plan, elem_size); + ov::reference::strided_slice(flat_data, static_cast(outputs[0].data()), data_shape, slice_plan, elem_size); return true; } } // namespace -bool ngraph::op::v1::BatchToSpace::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool BatchToSpace::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_BatchToSpace_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(inputs, 4)); - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - - if (outputs[0]->get_partial_shape().is_dynamic()) { - std::vector input_shapes; - input_shapes.reserve(inputs.size()); + OPENVINO_ASSERT(outputs.size() == 1); - for (size_t i = 0; i < inputs.size(); ++i) { - input_shapes.push_back(inputs[i]->get_partial_shape()); - if (input_shapes.back().is_dynamic()) { - return false; - } - } - - const auto output_shape = shape_infer(this, input_shapes, ov::make_tensor_accessor(inputs)).front().to_shape(); - - outputs[0]->set_element_type(inputs[0]->get_element_type()); - outputs[0]->set_shape(output_shape); + std::vector input_shapes; + for (const auto& in : inputs) { + input_shapes.emplace_back(in.get_shape()); } + const auto output_shape = shape_infer(this, input_shapes, ov::make_tensor_accessor(inputs)).front().to_shape(); + outputs[0].set_shape(output_shape); + return batch_to_space_evaluate(outputs, inputs); } -bool ngraph::op::v1::BatchToSpace::has_evaluate() const { +bool BatchToSpace::has_evaluate() const { OV_OP_SCOPE(v1_BatchToSpace_has_evaluate); return !get_input_partial_shape(0).is_dynamic() && get_input_shape(0).size() >= 2 && get_input_shape(0).size() <= shape_size(get_input_shape(1)); } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/ceiling.cpp b/src/core/src/op/ceiling.cpp index 8b8f9e96f2503f..c46ed21ae03ebc 100644 --- a/src/core/src/op/ceiling.cpp +++ b/src/core/src/op/ceiling.cpp @@ -44,7 +44,7 @@ bool Ceiling::evaluate(TensorVector& outputs, const TensorVector& inputs) const outputs[0].set_shape(inputs[0].get_shape()); using namespace ov::element; - return IfTypeOf::apply( + return IfTypeOf::apply( inputs[0].get_element_type(), inputs[0], outputs[0], @@ -62,6 +62,7 @@ bool Ceiling::has_evaluate() const { case element::u16: case element::u32: case element::u64: + case element::f16: case element::f32: return true; default: diff --git a/src/core/src/op/eye.cpp b/src/core/src/op/eye.cpp index edf9abbb06f4c4..4f1ecca6d47ad7 100644 --- a/src/core/src/op/eye.cpp +++ b/src/core/src/op/eye.cpp @@ -8,7 +8,6 @@ #include "eye_shape_inference.hpp" #include "itt.hpp" #include "openvino/core/validation_util.hpp" -#include "openvino/op/util/evaluate_helpers.hpp" #include "openvino/reference/eye.hpp" namespace ov { @@ -107,7 +106,7 @@ bool Eye::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OPENVINO_ASSERT(outputs.size() == 1); // Inputs size and shapes checked by shape_infer - const auto input_shapes = util::get_tensors_partial_shapes(inputs); + const auto input_shapes = ov::util::get_tensors_partial_shapes(inputs); const auto output_shape = shape_infer(this, input_shapes, make_tensor_accessor(inputs)).front().to_shape(); int64_t diagonal_index; diff --git a/src/core/src/op/logical_and.cpp b/src/core/src/op/logical_and.cpp index d6f451715a564d..fe8bd612ed2d85 100644 --- a/src/core/src/op/logical_and.cpp +++ b/src/core/src/op/logical_and.cpp @@ -2,83 +2,53 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/logical_and.hpp" + #include "itt.hpp" -#include "ngraph/op/and.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/reference/and.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; - -op::v1::LogicalAnd::LogicalAnd(const Output& arg0, - const Output& arg1, - const AutoBroadcastSpec& auto_broadcast) +namespace ov { +namespace op { +namespace v1 { +LogicalAnd::LogicalAnd(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseLogical(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -bool op::v1::LogicalAnd::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v1_LogicalAnd_visit_attributes); - BinaryElementwiseLogical::visit_attributes(visitor); - return true; -} - -shared_ptr op::v1::LogicalAnd::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr LogicalAnd::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_LogicalAnd_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace logand { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::logical_and(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} - -bool evaluate_logand(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_logand, boolean, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace logand - -bool op::v1::LogicalAnd::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool LogicalAnd::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_LogicalAnd_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2)); - OPENVINO_SUPPRESS_DEPRECATED_END - return logand::evaluate_logand(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 2); + + const auto& shape_0 = inputs[0].get_shape(); + const auto& shape_1 = inputs[1].get_shape(); + outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1)); + + if (inputs[0].get_element_type() == element::boolean) { + using T = fundamental_type_for; + reference::logical_and(inputs[0].data(), + inputs[1].data(), + outputs[0].data(), + shape_0, + shape_1, + get_autob()); + return true; + } else { + return false; + } } -bool op::v1::LogicalAnd::has_evaluate() const { +bool LogicalAnd::has_evaluate() const { OV_OP_SCOPE(v1_LogicalAnd_has_evaluate); - switch (get_input_element_type(0)) { - case ngraph::element::boolean: - return true; - default: - break; - } - return false; + return get_input_element_type(0) == element::boolean; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/logical_not.cpp b/src/core/src/op/logical_not.cpp index 7ed4971861766a..db9f939463651a 100644 --- a/src/core/src/op/logical_not.cpp +++ b/src/core/src/op/logical_not.cpp @@ -2,22 +2,34 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/logical_not.hpp" + +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/not.hpp" -#include "ngraph/op/op.hpp" -#include "ngraph/op/util/elementwise_args.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" -#include "openvino/reference/not.hpp" +#include "openvino/reference/logical_not.hpp" + +namespace ov { +namespace op { +namespace logical_not { -using namespace ngraph; -using namespace std; +struct Evaluate : element::NoAction { + using element::NoAction::visit; -op::v1::LogicalNot::LogicalNot(const Output& arg) : Op({arg}) { + template > + static result_type visit(const Tensor& in, Tensor& out, const size_t count) { + reference::logical_not(in.data(), out.data(), count); + return true; + } +}; +} // namespace logical_not + +namespace v1 { + +LogicalNot::LogicalNot(const Output& arg) : Op({arg}) { constructor_validate_and_infer_types(); } -void op::v1::LogicalNot::validate_and_infer_types() { +void LogicalNot::validate_and_infer_types() { OV_OP_SCOPE(v1_LogicalNot_validate_and_infer_types); const auto& element_type = get_input_element_type(0); // No boolean element_type validation for backward compatibility @@ -25,64 +37,43 @@ void op::v1::LogicalNot::validate_and_infer_types() { set_output_type(0, element_type, arg_pshape); } -shared_ptr op::v1::LogicalNot::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr LogicalNot::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_LogicalNot_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace notop { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::logical_not(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; + return std::make_shared(new_args.at(0)); } -bool evaluate_not(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); +bool LogicalNot::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v1_LogicalNot_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_not, boolean, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace notop + outputs[0].set_shape(inputs[0].get_shape()); -bool op::v1::LogicalNot::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v1_LogicalNot_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return notop::evaluate_not(inputs[0], outputs[0], inputs[0]->get_element_count()); + using namespace ov::element; + return IfTypeOf::apply( + inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(inputs[0].get_shape())); } -bool op::v1::LogicalNot::has_evaluate() const { +bool LogicalNot::has_evaluate() const { OV_OP_SCOPE(v1_LogicalNot_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } + +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/logical_or.cpp b/src/core/src/op/logical_or.cpp index c473e6c12e385f..403089318de314 100644 --- a/src/core/src/op/logical_or.cpp +++ b/src/core/src/op/logical_or.cpp @@ -2,77 +2,54 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/logical_or.hpp" + #include "itt.hpp" -#include "ngraph/op/or.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/reference/or.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace v1 { -op::v1::LogicalOr::LogicalOr(const Output& arg0, - const Output& arg1, - const AutoBroadcastSpec& auto_broadcast) +LogicalOr::LogicalOr(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseLogical(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::LogicalOr::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr LogicalOr::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_LogicalOr_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace logor { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::logical_or(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} - -bool evaluate_logor(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_logor, boolean, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; - } - return rc; + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -} // namespace -} // namespace logor -bool op::v1::LogicalOr::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool LogicalOr::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_LogicalOr_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2)); - OPENVINO_SUPPRESS_DEPRECATED_END - return logor::evaluate_logor(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 2); + + const auto& shape_0 = inputs[0].get_shape(); + const auto& shape_1 = inputs[1].get_shape(); + outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1)); + + if (inputs[0].get_element_type() == element::boolean) { + using T = fundamental_type_for; + reference::logical_or(inputs[0].data(), + inputs[1].data(), + outputs[0].data(), + shape_0, + shape_1, + get_autob()); + return true; + } else { + return false; + } } -bool op::v1::LogicalOr::has_evaluate() const { +bool LogicalOr::has_evaluate() const { OV_OP_SCOPE(v1_LogicalOr_has_evaluate); - switch (get_input_element_type(0)) { - case ngraph::element::boolean: - return true; - default: - break; - } - return false; + return get_input_element_type(0) == element::boolean; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/max_pool.cpp b/src/core/src/op/max_pool.cpp index d40c13644cd3cd..df74c1e6a105b6 100644 --- a/src/core/src/op/max_pool.cpp +++ b/src/core/src/op/max_pool.cpp @@ -2,32 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/max_pool.hpp" +#include "openvino/op/max_pool.hpp" #include "itt.hpp" #include "max_pool_shape_inference.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/op/add.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" +#include "openvino/core/attribute_visitor.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/reference/max_pool.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace v1 { -op::v1::MaxPool::MaxPool(const Output& arg, - const Strides& strides, - const ov::Shape& pads_begin, - const ov::Shape& pads_end, - const ov::Shape& kernel, - const op::RoundingType rounding_type, - const PadType auto_pad) - : op::util::MaxPoolBase(arg, strides, pads_begin, pads_end, kernel, rounding_type, auto_pad) { +MaxPool::MaxPool(const Output& arg, + const Strides& strides, + const Shape& pads_begin, + const Shape& pads_end, + const Shape& kernel, + const RoundingType rounding_type, + const PadType auto_pad) + : util::MaxPoolBase(arg, strides, pads_begin, pads_end, kernel, rounding_type, auto_pad) { constructor_validate_and_infer_types(); } -bool ngraph::op::v1::MaxPool::visit_attributes(AttributeVisitor& visitor) { +bool MaxPool::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_MaxPool_visit_attributes); visitor.on_attribute("strides", m_strides); visitor.on_attribute("pads_begin", m_pads_begin); @@ -38,7 +36,7 @@ bool ngraph::op::v1::MaxPool::visit_attributes(AttributeVisitor& visitor) { return true; } -void op::v1::MaxPool::validate_and_infer_types() { +void MaxPool::validate_and_infer_types() { OV_OP_SCOPE(v1_MaxPool_validate_and_infer_types); OPENVINO_SUPPRESS_DEPRECATED_START @@ -47,219 +45,105 @@ void op::v1::MaxPool::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), output_shapes.front()); } -shared_ptr op::v1::MaxPool::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr MaxPool::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_MaxPool_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), - m_strides, - m_pads_begin, - m_pads_end, - m_kernel, - m_rounding_type, - m_auto_pad); + return std::make_shared(new_args.at(0), + m_strides, + m_pads_begin, + m_pads_end, + m_kernel, + m_rounding_type, + m_auto_pad); } -OPENVINO_SUPPRESS_DEPRECATED_START namespace maxpool { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg, - const HostTensorPtr& out, - const ov::Shape& out_shape, - const ov::Shape& window_shape, - const Strides& window_movement_strides, - const ov::Shape& padding_below, - const ov::Shape& padding_above) { - using T = typename element_type_traits::value_type; - out->set_shape(out_shape); - ov::reference::max_pool(arg->get_data_ptr(), - out->get_data_ptr(), - arg->get_shape(), - out_shape, - window_shape, - window_movement_strides, - padding_below, - padding_above); - return true; -} - -bool evaluate_maxpool(const HostTensorPtr& arg, - const HostTensorPtr& out, - const ov::Shape& out_shape, - const ov::Shape& kernel, - const Strides& strides, - const ov::Shape& pad_begin, - const ov::Shape& pad_end) { - bool rc = true; - auto arg_shape = arg->get_shape(); +struct Evaluate : element::NoAction { + using element::NoAction::visit; - switch (out->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_maxpool, i32, arg, out, out_shape, kernel, strides, pad_begin, pad_end); - OPENVINO_TYPE_CASE(evaluate_maxpool, i64, arg, out, out_shape, kernel, strides, pad_begin, pad_end); - OPENVINO_TYPE_CASE(evaluate_maxpool, u32, arg, out, out_shape, kernel, strides, pad_begin, pad_end); - OPENVINO_TYPE_CASE(evaluate_maxpool, u64, arg, out, out_shape, kernel, strides, pad_begin, pad_end); - OPENVINO_TYPE_CASE(evaluate_maxpool, f16, arg, out, out_shape, kernel, strides, pad_begin, pad_end); - OPENVINO_TYPE_CASE(evaluate_maxpool, f32, arg, out, out_shape, kernel, strides, pad_begin, pad_end); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in, + Tensor& out, + const Shape& in_shape, + const Shape& out_shape, + const Shape& kernel, + const Strides& strides, + const Shape& pads_begin, + const Shape& pads_end) { + reference::max_pool(in.data(), + out.data(), + in_shape, + out_shape, + kernel, + strides, + pads_begin, + pads_end); + return true; } - return rc; -} -} // namespace +}; } // namespace maxpool -bool op::v1::MaxPool::evaluate_maxpool(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - const auto input_shapes = std::vector{inputs[0]->get_partial_shape()}; +bool MaxPool::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v1_MaxPool_evaluate); + const auto input_shapes = std::vector{inputs[0].get_shape()}; auto pads_begin = m_pads_begin; auto pads_end = m_pads_end; - auto out_shape = shape_infer(this, input_shapes, pads_begin, pads_end).front(); + const auto output_shape = shape_infer(this, input_shapes, pads_begin, pads_end).front(); - return maxpool::evaluate_maxpool(inputs[0], - outputs[0], - out_shape.get_shape(), - get_kernel(), - get_strides(), - get_pads_begin(), - get_pads_end()); -} -bool op::v1::MaxPool::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v1_MaxPool_evaluate); - return evaluate_maxpool(outputs, inputs); + outputs[0].set_shape(output_shape.get_shape()); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + inputs[0].get_shape(), + outputs[0].get_shape(), + get_kernel(), + get_strides(), + get_pads_begin(), + get_pads_end()); } -bool op::v1::MaxPool::has_evaluate() const { +bool MaxPool::has_evaluate() const { OV_OP_SCOPE(v1_MaxPool_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: + case element::f16: + case element::f32: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov // ------------------------------ V8 ------------------------------ +namespace ov { +namespace op { +namespace v8 { -namespace maxpool_v8 { -namespace { -template -inline bool evaluate(const HostTensorPtr& data, - const HostTensorPtr& values, - const HostTensorPtr& indices, - const ov::Shape& out_shape, - const ov::Shape& kernel, - const Strides& strides, - const Strides& dilations, - const ov::Shape& pads_begin, - const ov::Shape& pads_end, - const int64_t axis) { - using Values_t = typename element_type_traits::value_type; - using Indices_t = typename element_type_traits::value_type; - ov::reference::max_pool(data->get_data_ptr(), - values->get_data_ptr(), - indices->get_data_ptr(), - data->get_shape(), - out_shape, - kernel, - strides, - dilations, - pads_begin, - pads_end, - axis); - return true; -} - -bool evaluate_maxpool(const HostTensorPtr& data, - const HostTensorPtr& values, - const HostTensorPtr& indices, - const ov::Shape& out_shape, - const ov::Shape& kernel, - const Strides& strides, - const Strides& dilations, - const ov::Shape& pads_begin, - const ov::Shape& pads_end, - const int64_t axis) { -#define EVAL_MAX_POOL_8(data_et, index_et) \ - OPENVINO_2_TYPES_CASE(maxpool_v8::evaluate_maxpool, \ - data_et, \ - index_et, \ - data, \ - values, \ - indices, \ - out_shape, \ - kernel, \ - strides, \ - dilations, \ - pads_begin, \ - pads_end, \ - axis) - - bool rc = true; - switch (indices->get_element_type()) { - case element::Type_t::i32: { - switch (data->get_element_type()) { - EVAL_MAX_POOL_8(i8, i32); - EVAL_MAX_POOL_8(i32, i32); - EVAL_MAX_POOL_8(i64, i32); - EVAL_MAX_POOL_8(u8, i32); - EVAL_MAX_POOL_8(u32, i32); - EVAL_MAX_POOL_8(u64, i32); - EVAL_MAX_POOL_8(f16, i32); - EVAL_MAX_POOL_8(f32, i32); - default: - rc = false; - break; - } - } break; - case element::Type_t::i64: { - switch (data->get_element_type()) { - EVAL_MAX_POOL_8(i8, i64); - EVAL_MAX_POOL_8(i32, i64); - EVAL_MAX_POOL_8(i64, i64); - EVAL_MAX_POOL_8(u8, i64); - EVAL_MAX_POOL_8(u32, i64); - EVAL_MAX_POOL_8(u64, i64); - EVAL_MAX_POOL_8(f16, i64); - EVAL_MAX_POOL_8(f32, i64); - default: - rc = false; - break; - } - } break; - default: - rc = false; - break; - } - - return rc; -} -} // namespace -} // namespace maxpool_v8 - -op::v8::MaxPool::MaxPool(const Output& arg, - const Strides& strides, - const Strides& dilations, - const ov::Shape& pads_begin, - const ov::Shape& pads_end, - const ov::Shape& kernel, - const op::RoundingType rounding_type, - const PadType auto_pad, - const element::Type index_element_type, - const int64_t axis) - : op::util::MaxPoolBase(arg, strides, pads_begin, pads_end, kernel, rounding_type, auto_pad), +MaxPool::MaxPool(const Output& arg, + const Strides& strides, + const Strides& dilations, + const Shape& pads_begin, + const Shape& pads_end, + const Shape& kernel, + const RoundingType rounding_type, + const PadType auto_pad, + const element::Type index_element_type, + const int64_t axis) + : util::MaxPoolBase(arg, strides, pads_begin, pads_end, kernel, rounding_type, auto_pad), m_dilations{dilations}, m_index_element_type{index_element_type}, m_axis{axis} { constructor_validate_and_infer_types(); } -bool ngraph::op::v8::MaxPool::visit_attributes(AttributeVisitor& visitor) { +bool MaxPool::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v8_MaxPool_visit_attributes); visitor.on_attribute("strides", m_strides); visitor.on_attribute("dilations", m_dilations); @@ -273,13 +157,13 @@ bool ngraph::op::v8::MaxPool::visit_attributes(AttributeVisitor& visitor) { return true; } -void op::v8::MaxPool::validate_and_infer_types() { +void MaxPool::validate_and_infer_types() { OV_OP_SCOPE(v8_MaxPool_validate_and_infer_types); const auto input_shape = get_input_partial_shape(0); if (input_shape.rank().is_static()) { OPENVINO_SUPPRESS_DEPRECATED_START - m_axis = ngraph::normalize_axis(this, m_axis, input_shape.rank()); + m_axis = normalize_axis(this, m_axis, input_shape.rank()); OPENVINO_SUPPRESS_DEPRECATED_END } @@ -290,55 +174,126 @@ void op::v8::MaxPool::validate_and_infer_types() { set_output_type(1, m_index_element_type, output_shapes[1]); } -shared_ptr op::v8::MaxPool::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr MaxPool::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v8_MaxPool_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), - m_strides, - m_dilations, - m_pads_begin, - m_pads_end, - m_kernel, - m_rounding_type, - m_auto_pad, - m_index_element_type, - m_axis); + return std::make_shared(new_args.at(0), + m_strides, + m_dilations, + m_pads_begin, + m_pads_end, + m_kernel, + m_rounding_type, + m_auto_pad, + m_index_element_type, + m_axis); } -bool op::v8::MaxPool::has_evaluate() const { - OV_OP_SCOPE(v8_MaxPool_has_evaluate); - switch (get_input_element_type(0)) { - case ngraph::element::i8: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: - return true; - default: - break; +namespace maxpool { +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& in, + Tensor& out_values, + Tensor& out_indices, + const Shape& in_shape, + const Shape& out_shape, + const Shape& kernel, + const Strides& strides, + const Strides& dilations, + const Shape& pads_begin, + const Shape& pads_end, + const int64_t axis) { + using namespace ov::element; + return IfTypeOf::apply(out_indices.get_element_type(), + in.data(), + out_values.data(), + out_indices, + in_shape, + out_shape, + kernel, + strides, + dilations, + pads_begin, + pads_end, + axis); } - return false; -} -bool op::v8::MaxPool::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +private: + struct EvalByIdxType : public element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const T* in_data, + T* out_values_data, + Tensor& out_indices, + const Shape& in_shape, + const Shape& out_shape, + const Shape& kernel, + const Strides& strides, + const Strides& dilations, + const Shape& pads_begin, + const Shape& pads_end, + const int64_t axis) { + reference::max_pool(in_data, + out_values_data, + out_indices.data(), + in_shape, + out_shape, + kernel, + strides, + dilations, + pads_begin, + pads_end, + axis); + return true; + } + }; +}; +} // namespace maxpool + +bool MaxPool::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v8_MaxPool_evaluate); - const auto input_shapes = std::vector{inputs[0]->get_partial_shape()}; + const auto input_shapes = std::vector{inputs[0].get_shape()}; auto pads_begin = m_pads_begin; auto pads_end = m_pads_end; - auto out_shape = shape_infer(this, input_shapes, pads_begin, pads_end).front(); + const auto output_shape = shape_infer(this, input_shapes, pads_begin, pads_end).front(); - return maxpool_v8::evaluate_maxpool(inputs[0], - outputs[0], - outputs[1], - out_shape.get_shape(), - get_kernel(), - get_strides(), - get_dilations(), - get_pads_begin(), - get_pads_end(), - get_axis()); + outputs[0].set_shape(output_shape.get_shape()); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + outputs[1], + inputs[0].get_shape(), + outputs[0].get_shape(), + get_kernel(), + get_strides(), + get_dilations(), + get_pads_begin(), + get_pads_end(), + get_axis()); } + +bool MaxPool::has_evaluate() const { + OV_OP_SCOPE(v8_MaxPool_has_evaluate); + switch (get_input_element_type(0)) { + case element::i8: + case element::i32: + case element::i64: + case element::u8: + case element::u32: + case element::u64: + case element::f16: + case element::f32: + return true; + default: + return false; + } +} + +} // namespace v8 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/multinomial.cpp b/src/core/src/op/multinomial.cpp index 90f41369364879..d8e6a967f38b58 100644 --- a/src/core/src/op/multinomial.cpp +++ b/src/core/src/op/multinomial.cpp @@ -6,11 +6,9 @@ #include -#include "bound_evaluate.hpp" #include "itt.hpp" #include "multinomial_shape_inference.hpp" #include "openvino/core/attribute_visitor.hpp" -#include "openvino/op/constant.hpp" #include "openvino/op/util/op_types.hpp" #include "openvino/reference/multinomial.hpp" @@ -60,7 +58,7 @@ void op::v13::Multinomial::validate_and_infer_types() { std::shared_ptr op::v13::Multinomial::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v13_Multinomial_clone_with_new_inputs); - check_new_args_count(this, new_args); + check_new_args_count(this, new_args); return std::make_shared(new_args.at(0), new_args.at(1), diff --git a/src/core/src/op/random_uniform.cpp b/src/core/src/op/random_uniform.cpp index 296b115979c8f7..b07044960f44c9 100644 --- a/src/core/src/op/random_uniform.cpp +++ b/src/core/src/op/random_uniform.cpp @@ -92,6 +92,8 @@ bool RandomUniform::evaluate(TensorVector& outputs, const TensorVector& inputs) const auto& t_out = get_out_type(); OPENVINO_ASSERT(validate::out_et(t_out), "Unsupported type of RandomUniform: " + t_out.get_type_name()); + outputs[0].set_shape(out_shape); + auto state = ov::reference::random_uniform(out_dims.data(), static_cast(inputs[1].data()), static_cast(inputs[2].data()), diff --git a/src/core/src/op/reduce_prod.cpp b/src/core/src/op/reduce_prod.cpp index 9d345ae63cf301..9d2c4dee4a8c51 100644 --- a/src/core/src/op/reduce_prod.cpp +++ b/src/core/src/op/reduce_prod.cpp @@ -7,6 +7,7 @@ #include "bound_evaluate.hpp" #include "element_visitor.hpp" #include "itt.hpp" +#include "ngraph/validation_util.hpp" #include "openvino/core/shape_util.hpp" #include "openvino/op/util/axes_util.hpp" #include "openvino/reference/reduce_prod.hpp" @@ -15,11 +16,11 @@ namespace ov { namespace op { namespace reduce_prod { namespace { -bool has_positive_bounds_on_data(const Node* const op) { +bool has_non_negative_bounds_on_data(const Node* const op) { const auto& lb = op->get_input_tensor(0).get_lower_value(); const auto& ub = op->get_input_tensor(0).get_upper_value(); - return lb && ub && tensor_is_positive(lb) && tensor_is_positive(ub); + return lb && ub && tensor_is_non_negative(lb) && tensor_is_non_negative(ub); } } // namespace @@ -78,14 +79,34 @@ bool ReduceProd::has_evaluate() const { } bool ReduceProd::evaluate_lower(ov::TensorVector& output_values) const { - return reduce_prod::has_positive_bounds_on_data(this) && get_input_tensor(1).has_and_set_bound() && + return reduce_prod::has_non_negative_bounds_on_data(this) && get_input_tensor(1).has_and_set_bound() && default_lower_bound_evaluator(this, output_values); } bool ReduceProd::evaluate_upper(ov::TensorVector& output_values) const { - return reduce_prod::has_positive_bounds_on_data(this) && get_input_tensor(1).has_and_set_bound() && - default_upper_bound_evaluator(this, output_values); + if (!reduce_prod::has_non_negative_bounds_on_data(this) || !get_input_tensor(1).has_and_set_bound()) + return false; + // We need to cover a case: if an Upper Bound comes from ShapeOf and contains + // dynamic dimension (-1) - it has a value max_of_type, which points on + // a maximum possible value. For example, Upper Bound of shape [-1, 12] is + // [max_of_type, 12]. + // In such case we shouldn't evaluate a real ReduceProd because it'll cause an + // overflow and returns wrong value. We should return an Upper Bound as for [-1], + // which will be evaluated as [max_of_type] + // In case dimensions has a zero dimension - it should return 0 in any case + if (tensor_has_max_value(get_input_tensor(0).get_upper_value()) && + !tensor_has_zero_value(get_input_tensor(0).get_upper_value())) { + OPENVINO_SUPPRESS_DEPRECATED_START + auto max_constant = ngraph::get_constant_max_of_type(get_output_element_type(0)); + OPENVINO_SUPPRESS_DEPRECATED_END + OPENVINO_ASSERT(max_constant->get_byte_size() <= output_values[0].get_byte_size()); + memcpy(output_values[0].data(), max_constant->get_data_ptr(), max_constant->get_byte_size()); + return true; + } + + return default_upper_bound_evaluator(this, output_values); } + } // namespace v1 } // namespace op } // namespace ov diff --git a/src/core/src/op/split.cpp b/src/core/src/op/split.cpp index dc2ac72b509db7..4c5563e892321c 100644 --- a/src/core/src/op/split.cpp +++ b/src/core/src/op/split.cpp @@ -2,42 +2,46 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "openvino/reference/split.hpp" +#include "openvino/op/split.hpp" #include -#include #include "bound_evaluate.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/builder/split.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/op/split.hpp" -#include "ngraph/op/util/op_types.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" - -using namespace std; -using namespace ngraph; - -op::v1::Split::Split(const Output& data, const Output& axis, const size_t num_splits) +#include "openvino/core/validation_util.hpp" +#include "openvino/reference/split.hpp" +#include "split_shape_inference.hpp" + +namespace ov { +namespace op { + +namespace v1 { +namespace validate { +namespace { +bool axis_type(const element::Type& et) { + return et.is_integral_number(); +} +} // namespace +} // namespace validate + +Split::Split(const Output& data, const Output& axis, const size_t num_splits) : Op({data, axis}), m_num_splits{num_splits} { constructor_validate_and_infer_types(); } -bool ngraph::op::v1::Split::visit_attributes(AttributeVisitor& visitor) { +bool Split::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_Split_visit_attributes); visitor.on_attribute("num_splits", m_num_splits); return true; } -void op::v1::Split::validate_and_infer_types() { +void Split::validate_and_infer_types() { OV_OP_SCOPE(v1_Split_validate_and_infer_types); const auto& axis_et = get_input_element_type(1); NODE_VALIDATION_CHECK(this, - axis_et.is_integral_number(), + validate::axis_type(axis_et), "Element type of 'axis' input must be integer. Got: ", axis_et); @@ -58,72 +62,70 @@ void op::v1::Split::validate_and_infer_types() { set_input_is_relevant_to_shape(0); } -shared_ptr op::v1::Split::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Split::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Split_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), m_num_splits); + return std::make_shared(new_args.at(0), new_args.at(1), m_num_splits); } -OPENVINO_SUPPRESS_DEPRECATED_START -bool op::v1::Split::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Split::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Split_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, m_num_splits) && validate_host_tensor_vector(inputs, 2)); - OPENVINO_SUPPRESS_DEPRECATED_END + OPENVINO_ASSERT(outputs.size() == m_num_splits); - if (has_evaluate()) { + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + const auto& axis_tensor = inputs[1]; + const auto result = validate::axis_type(axis_tensor.get_element_type()); + if (result) { const auto& data_tensor = inputs[0]; - const auto& axis_tensor = inputs[1]; - - const auto input_shapes = - std::vector{data_tensor->get_partial_shape(), axis_tensor->get_partial_shape()}; - - auto output_shapes = shape_infer(this, input_shapes, make_tensor_accessor(inputs)); auto outputs_data = std::vector(m_num_splits); - for (size_t i = 0; i < m_num_splits; ++i) { - outputs[i]->set_shape(output_shapes[i].get_shape()); - outputs_data[i] = outputs[i]->get_data_ptr(); + { + auto outputs_it = outputs.begin(); + auto outputs_data_it = outputs_data.begin(); + for (const auto& p_shape : output_shapes) { + outputs_it->set_shape(p_shape.get_shape()); + *outputs_data_it = static_cast(outputs_it->data()); + ++outputs_it, ++outputs_data_it; + } } - OPENVINO_SUPPRESS_DEPRECATED_START - auto axis = host_tensor_2_vector(axis_tensor)[0]; - axis = normalize_axis(this, axis, data_tensor->get_partial_shape().rank()); - OPENVINO_SUPPRESS_DEPRECATED_END + auto axis = get_tensor_data_as(axis_tensor).front(); + axis = ov::util::normalize(axis, data_tensor.get_shape().size()); - ov::reference::split(data_tensor->get_data_ptr(), - data_tensor->get_shape(), - data_tensor->get_element_type().size(), + ov::reference::split(static_cast(data_tensor.data()), + data_tensor.get_shape(), + data_tensor.get_element_type().size(), axis, m_num_splits, outputs_data.data()); - return true; } - return false; + + return result; } -OPENVINO_SUPPRESS_DEPRECATED_END -bool op::v1::Split::has_evaluate() const { +bool Split::has_evaluate() const { OV_OP_SCOPE(v1_Split_has_evaluate); - return get_input_element_type(1).is_integral_number(); + return validate::axis_type(get_input_element_type(1)); } -bool op::v1::Split::evaluate_lower(ov::TensorVector& output_values) const { +bool Split::evaluate_lower(ov::TensorVector& output_values) const { OV_OP_SCOPE(v1_Split_evaluate_lower); - - return input(1).get_tensor().has_and_set_bound() && default_lower_bound_evaluator(this, output_values); + return get_input_tensor(1).has_and_set_bound() && default_lower_bound_evaluator(this, output_values); } -bool op::v1::Split::evaluate_upper(ov::TensorVector& output_values) const { +bool Split::evaluate_upper(ov::TensorVector& output_values) const { OV_OP_SCOPE(v1_Split_evaluate_upper); - - return input(1).get_tensor().has_and_set_bound() && default_upper_bound_evaluator(this, output_values); + return get_input_tensor(1).has_and_set_bound() && default_upper_bound_evaluator(this, output_values); } -bool op::v1::Split::evaluate_label(TensorLabelVector& output_labels) const { +bool Split::evaluate_label(TensorLabelVector& output_labels) const { OPENVINO_ASSERT(output_labels.size() == get_num_splits()); OPENVINO_SUPPRESS_DEPRECATED_START - return input(1).get_tensor().has_and_set_bound() && default_label_evaluator(this, output_labels); + return get_input_tensor(1).has_and_set_bound() && default_label_evaluator(this, output_labels); OPENVINO_SUPPRESS_DEPRECATED_END } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/squeeze.cpp b/src/core/src/op/squeeze.cpp index 50bf9af02d00a5..5e47edae2c1cc9 100644 --- a/src/core/src/op/squeeze.cpp +++ b/src/core/src/op/squeeze.cpp @@ -2,34 +2,39 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/squeeze.hpp" +#include "openvino/op/squeeze.hpp" -#include -#include -#include -#include +#include #include "bound_evaluate.hpp" #include "itt.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/validation_util.hpp" -#include "openvino/reference/copy.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" #include "squeeze_shape_inference.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace v0 { +namespace validate { +namespace { -op::Squeeze::Squeeze() : Op() {} +bool axes_has_and_set_bound(const Node& op) { + return (op.get_input_size() < 2) || op.get_input_tensor(1).has_and_set_bound(); +} +} // namespace +} // namespace validate + +Squeeze::Squeeze() : Op() {} -op::Squeeze::Squeeze(const Output& data, const Output& axes) : Op({data, axes}) { +Squeeze::Squeeze(const Output& data, const Output& axes) : Op({data, axes}) { constructor_validate_and_infer_types(); } -op::Squeeze::Squeeze(const Output& data) : Op({data}) { +Squeeze::Squeeze(const Output& data) : Op({data}) { constructor_validate_and_infer_types(); } -void op::Squeeze::validate_and_infer_types() { +void Squeeze::validate_and_infer_types() { OV_OP_SCOPE(v0_Squeeze_validate_and_infer_types); OPENVINO_SUPPRESS_DEPRECATED_START @@ -40,115 +45,86 @@ void op::Squeeze::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), output_shapes[0]); } -bool ngraph::op::v0::Squeeze::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Squeeze_visit_attributes); - return true; -} - -shared_ptr op::Squeeze::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Squeeze::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Squeeze_clone_with_new_inputs); check_new_args_count(this, new_args); - if (new_args.size() == 1) { - return make_shared(new_args.at(0)); - } else if (new_args.size() == 2) { - return make_shared(new_args.at(0), new_args.at(1)); - } else { + + switch (new_args.size()) { + case 1: + return std::make_shared(new_args[0]); + case 2: + return std::make_shared(new_args[0], new_args[1]); + default: OPENVINO_THROW("Incorrect number of new arguments"); } } -OPENVINO_SUPPRESS_DEPRECATED_START -bool op::v0::Squeeze::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Squeeze::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_Squeeze_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(inputs, inputs.size())); - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - - if (has_evaluate()) { - auto input_shapes = std::vector{inputs[0]->get_partial_shape()}; - - if (inputs.size() == 2) { - input_shapes.push_back(inputs[1]->get_partial_shape()); - } + OPENVINO_ASSERT(outputs.size() == 1); - auto output_shapes = shape_infer(this, input_shapes, make_tensor_accessor(inputs)); + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + outputs[0].set_shape(output_shapes.front().get_shape()); - auto out_shape = output_shapes[0].get_shape(); - outputs[0]->set_shape(out_shape); - - ov::reference::copy(inputs[0]->get_data_ptr(), - outputs[0]->get_data_ptr(), - shape_size(out_shape) * outputs[0]->get_element_type().size()); - - return true; - } - return false; + std::memcpy(outputs[0].data(), inputs[0].data(), outputs[0].get_byte_size()); + return true; } -OPENVINO_SUPPRESS_DEPRECATED_END -bool op::v0::Squeeze::has_evaluate() const { +bool Squeeze::has_evaluate() const { OV_OP_SCOPE(v0_Squeeze_has_evaluate); - - if (get_input_size() == 2) { - switch (get_input_element_type(1)) { - case ngraph::element::i8: - case ngraph::element::i16: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u16: - case ngraph::element::u32: - case ngraph::element::u64: + const auto validate_axes_type = [](const element::Type& et) -> bool { + switch (et) { + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; - } else if (get_input_size() == 1) { - return true; - } else { - return false; - } + }; + + return (get_input_size() < 2) || validate_axes_type(get_input_element_type(1)); } -bool op::v0::Squeeze::evaluate_lower(ov::TensorVector& output_values) const { +bool Squeeze::evaluate_lower(TensorVector& output_values) const { OV_OP_SCOPE(v0_Squeeze_evaluate_lower); - if (inputs().size() > 1 && !input_value(1).get_tensor().has_and_set_bound()) - return false; - return default_lower_bound_evaluator(this, output_values); + return validate::axes_has_and_set_bound(*this) && default_lower_bound_evaluator(this, output_values); } -bool op::v0::Squeeze::evaluate_upper(ov::TensorVector& output_values) const { +bool Squeeze::evaluate_upper(TensorVector& output_values) const { OV_OP_SCOPE(v0_Squeeze_evaluate_upper); - if (inputs().size() > 1 && !input_value(1).get_tensor().has_and_set_bound()) - return false; - return default_upper_bound_evaluator(this, output_values); + return validate::axes_has_and_set_bound(*this) && default_upper_bound_evaluator(this, output_values); } -bool op::v0::Squeeze::evaluate_label(TensorLabelVector& output_labels) const { - if (get_input_size() > 1 && !get_input_tensor(1).has_and_set_bound()) - return false; +bool Squeeze::evaluate_label(TensorLabelVector& output_labels) const { OPENVINO_SUPPRESS_DEPRECATED_START - return default_label_evaluator(this, output_labels); + return validate::axes_has_and_set_bound(*this) && default_label_evaluator(this, output_labels); OPENVINO_SUPPRESS_DEPRECATED_END } -bool op::v0::Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { OV_OP_SCOPE(v0_Squeeze_constant_fold); if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { return false; } - const auto& shape = get_output_shape(0); - - if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { - output_values[0] = std::make_shared(*data_const, shape); + if (auto data_const = std::dynamic_pointer_cast(inputs_values[0].get_node_shared_ptr())) { + const auto& shape = get_output_shape(0); + output_values[0] = std::make_shared(*data_const, shape); return true; } return false; } -bool op::v0::Squeeze::is_dynamic() const { +bool Squeeze::is_dynamic() const { return get_output_partial_shape(0).is_dynamic(); } +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/topk.cpp b/src/core/src/op/topk.cpp index da56c6bb7494c7..a84d0490d9bebd 100644 --- a/src/core/src/op/topk.cpp +++ b/src/core/src/op/topk.cpp @@ -4,163 +4,153 @@ #include "openvino/op/topk.hpp" -#include -#include - +#include "element_visitor.hpp" #include "itt.hpp" -#include "openvino/core/attribute_visitor.hpp" -#include "openvino/core/axis_vector.hpp" -#include "openvino/core/dimension_tracker.hpp" -#include "openvino/core/shape.hpp" #include "openvino/core/validation_util.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/util/op_types.hpp" #include "openvino/reference/topk.hpp" - -using namespace std; +#include "topk_shape_inference.hpp" namespace ov { -OPENVINO_SUPPRESS_DEPRECATED_START +namespace op { namespace topk { +namespace validate { namespace { -template -inline bool evaluate_execute(const ngraph::HostTensorPtr& arg0, - const ngraph::HostTensorPtr& out_indices, - const ngraph::HostTensorPtr& out_values, - const ov::Shape out_shape, - const size_t axis, - const size_t k, - const bool compute_max, - const op::v1::TopK::SortType sort) { - using T = typename element_type_traits::value_type; - using U = typename element_type_traits::value_type; - const ov::Shape in_shape = arg0->get_shape(); - out_indices->set_shape(out_shape); - out_indices->set_element_type(INDEX_ET); - - out_values->set_shape(out_shape); - out_values->set_element_type(arg0->get_element_type()); - - ov::reference::topk(arg0->get_data_ptr(), - out_indices->get_data_ptr(), - out_values->get_data_ptr(), - in_shape, - out_shape, - axis, - k, - compute_max, - sort); - return true; -} - -#define EXECUTE_EVALUATE_TOPK(a, ...) \ - case element::Type_t::a: { \ - OV_OP_SCOPE(OV_PP_CAT3(exec_topk_eval, _, a)); \ - rc = evaluate_execute(__VA_ARGS__); \ - } break - -template -bool evaluate(const ngraph::HostTensorPtr& arg, - const ngraph::HostTensorPtr& out_indices, - const ngraph::HostTensorPtr& out_values, - const ov::Shape out_shape, - const size_t axis, - const size_t k, - const bool max, - const op::v1::TopK::SortType sort, - const element::Type index_et) { - bool rc = true; - switch (index_et) { - EXECUTE_EVALUATE_TOPK(i32, arg, out_indices, out_values, out_shape, axis, k, max, sort); - EXECUTE_EVALUATE_TOPK(i64, arg, out_indices, out_values, out_shape, axis, k, max, sort); +bool data_type(const element::Type& et) { + switch (et) { + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: + return true; default: - rc = false; - break; + return false; } - return rc; } -bool evaluate_topk(const ngraph::HostTensorPtr& arg, - const ngraph::HostTensorPtr& out_indices, - const ngraph::HostTensorPtr& out_values, - const ov::Shape out_shape, - const size_t axis, - const size_t k, - const bool max, - const op::v1::TopK::SortType sort, - const element::Type index_et) { - bool rc = true; - switch (arg->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_topk, i32, arg, out_indices, out_values, out_shape, axis, k, max, sort, index_et); - OPENVINO_TYPE_CASE(evaluate_topk, i64, arg, out_indices, out_values, out_shape, axis, k, max, sort, index_et); - OPENVINO_TYPE_CASE(evaluate_topk, u32, arg, out_indices, out_values, out_shape, axis, k, max, sort, index_et); - OPENVINO_TYPE_CASE(evaluate_topk, u64, arg, out_indices, out_values, out_shape, axis, k, max, sort, index_et); - OPENVINO_TYPE_CASE(evaluate_topk, f16, arg, out_indices, out_values, out_shape, axis, k, max, sort, index_et); - OPENVINO_TYPE_CASE(evaluate_topk, f32, arg, out_indices, out_values, out_shape, axis, k, max, sort, index_et); +bool k_type(const element::Type& et) { + switch (et) { + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: + return true; default: - rc = false; - break; + return false; } - return rc; } -bool TopK_evaluate(const ov::op::util::TopKBase* const node, - const HostTensorVector& outputs, - const HostTensorVector& inputs) { - const auto& arg_shape = inputs[0]->get_shape(); - OPENVINO_SUPPRESS_DEPRECATED_START - const auto axis = normalize_axis(node, node->get_provided_axis(), arg_shape.size()); - OPENVINO_SUPPRESS_DEPRECATED_END - const auto compute_max = node->get_mode() == ov::op::TopKMode::MAX; - const auto sort_type = node->get_sort_type(); - - const auto input_shapes = vector{inputs[0]->get_partial_shape(), inputs[1]->get_partial_shape()}; - auto output_shape = shape_infer(node, input_shapes, ov::make_tensor_accessor(inputs)).front().to_shape(); +} // namespace +} // namespace validate + +struct Evaluate : public element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& in, + Tensor& out_values, + Tensor& out_indices, + const Shape& out_shape, + const size_t axis, + const bool compute_max, + const TopKSortType sort) { + using namespace ov::element; + return IfTypeOf::apply(out_indices.get_element_type(), + in.data(), + out_values.data(), + out_indices, + in.get_shape(), + out_shape, + axis, + out_shape[axis], + compute_max, + sort); + } + +private: + struct EvalByIdxType : public element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const T* in_first, + T* out_first, + Tensor& out_indices, + const Shape& in_shape, + const Shape& out_shape, + const size_t axis, + const size_t k, + const bool compute_max, + const TopKSortType sort) { + reference::topk(in_first, + out_indices.data(), + out_first, + in_shape, + out_shape, + axis, + k, + compute_max, + sort); + return true; + } + }; +}; + +namespace { +bool evaluate(const util::TopKBase* const node, TensorVector& outputs, const TensorVector& inputs) { + auto output_shapes = shape_infer(node, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + OPENVINO_ASSERT(outputs.size() == output_shapes.size()); + auto output_shape = output_shapes.front().get_shape(); + const auto axis = ov::util::normalize(node->get_provided_axis(), output_shape.size()); if (output_shape[axis] == 0) { // the kernel can't handle K (output_shape[axis]) equal 0, use arg_shape[axis] instead. - output_shape[axis] = arg_shape[axis]; + output_shape[axis] = inputs[0].get_shape()[axis]; } - const size_t k = output_shape[axis]; - OPENVINO_ASSERT(k <= arg_shape[axis], "'K' exceeds the dimension of top_k_axis"); - - // TopK reference implementation provides stable indices output so this parameter is not passed on - return evaluate_topk(inputs[0], - outputs[1], - outputs[0], - output_shape, - axis, - k, - compute_max, - sort_type, - node->get_index_element_type()); + for (auto& t : outputs) { + t.set_shape(output_shape); + } + + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + outputs[1], + output_shape, + axis, + (node->get_mode() == ov::op::TopKMode::MAX), + node->get_sort_type()); } } // namespace } // namespace topk // v1 version starts - -op::v1::TopK::TopK(const Output& data, - const Output& k, - const int64_t axis, - const std::string& mode, - const std::string& sort, - const element::Type& index_element_type) +namespace v1 { +TopK::TopK(const Output& data, + const Output& k, + const int64_t axis, + const std::string& mode, + const std::string& sort, + const element::Type& index_element_type) : util::TopKBase(data, k, axis, mode, sort, index_element_type) { constructor_validate_and_infer_types(); } -op::v1::TopK::TopK(const Output& data, - const Output& k, - const int64_t axis, - const Mode mode, - const SortType sort, - const element::Type& index_element_type) +TopK::TopK(const Output& data, + const Output& k, + const int64_t axis, + const Mode mode, + const SortType sort, + const element::Type& index_element_type) : util::TopKBase(data, k, axis, mode, sort, index_element_type) { constructor_validate_and_infer_types(); } -void op::v1::TopK::k_type_check(const element::Type& k_element_type) const { +void TopK::k_type_check(const element::Type& k_element_type) const { NODE_VALIDATION_CHECK( this, k_element_type == element::i8 || k_element_type == element::i32 || k_element_type == element::i64, @@ -169,156 +159,84 @@ void op::v1::TopK::k_type_check(const element::Type& k_element_type) const { ")."); } -shared_ptr op::v1::TopK::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr TopK::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_TopK_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), m_axis, m_mode, m_sort, m_index_element_type); + return std::make_shared(new_args.at(0), new_args.at(1), m_axis, m_mode, m_sort, m_index_element_type); } -bool op::v1::TopK::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool TopK::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_TopK_evaluate); - return topk::TopK_evaluate(this, outputs, inputs); + return topk::evaluate(this, outputs, inputs); } -bool op::v1::TopK::has_evaluate() const { +bool TopK::has_evaluate() const { OV_OP_SCOPE(v1_TopK_has_evaluate); - - switch (get_input_element_type(0)) { - case element::i32: - case element::i64: - case element::u32: - case element::u64: - case element::f16: - case element::f32: - break; - default: - return false; - } - - if (op::util::is_constant(input_value(1).get_node())) { - switch (get_input_element_type(1)) { - case element::i8: - case element::i32: - case element::i64: - break; - default: - return false; - } - } else { - switch (get_input_element_type(1)) { - case element::i8: - case element::i16: - case element::i32: - case element::i64: - case element::u8: - case element::u16: - case element::u32: - case element::u64: - break; - default: - return false; - } - } - - return true; + return topk::validate::data_type(get_input_element_type(0)) && topk::validate::k_type(get_input_element_type(1)); } +} // namespace v1 // v3 version starts -op::v3::TopK::TopK(const Output& data, - const Output& k, - const int64_t axis, - const std::string& mode, - const std::string& sort, - const element::Type& index_element_type) +namespace v3 { +TopK::TopK(const Output& data, + const Output& k, + const int64_t axis, + const std::string& mode, + const std::string& sort, + const element::Type& index_element_type) : TopK(data, k, axis, as_enum(mode), as_enum(sort), index_element_type) {} -op::v3::TopK::TopK(const Output& data, - const Output& k, - const int64_t axis, - const Mode mode, - const SortType sort, - const element::Type& index_element_type) +TopK::TopK(const Output& data, + const Output& k, + const int64_t axis, + const Mode mode, + const SortType sort, + const element::Type& index_element_type) : util::TopKBase{data, k, axis, mode, sort, index_element_type} { constructor_validate_and_infer_types(); } -shared_ptr op::v3::TopK::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr TopK::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v3_TopK_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), m_axis, m_mode, m_sort, m_index_element_type); + return std::make_shared(new_args.at(0), new_args.at(1), m_axis, m_mode, m_sort, m_index_element_type); } -bool op::v3::TopK::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool TopK::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v3_TopK_evaluate); - return topk::TopK_evaluate(this, outputs, inputs); + return topk::evaluate(this, outputs, inputs); } -bool op::v3::TopK::has_evaluate() const { +bool TopK::has_evaluate() const { OV_OP_SCOPE(v3_TopK_has_evaluate); - - switch (get_input_element_type(0)) { - case element::i32: - case element::i64: - case element::u32: - case element::u64: - case element::f16: - case element::f32: - break; - default: - return false; - } - - if (op::util::is_constant(input_value(1).get_node())) { - switch (get_input_element_type(1)) { - case element::i8: - case element::i32: - case element::i64: - break; - default: - return false; - } - } else { - switch (get_input_element_type(1)) { - case element::i8: - case element::i16: - case element::i32: - case element::i64: - case element::u8: - case element::u16: - case element::u32: - case element::u64: - break; - default: - return false; - } - } - - return true; + return topk::validate::data_type(get_input_element_type(0)) && topk::validate::k_type(get_input_element_type(1)); } +} // namespace v3 // =============== V11 =============== -ov::op::v11::TopK::TopK(const Output& data, - const Output& k, - const int64_t axis, - const std::string& mode, - const std::string& sort, - const element::Type& index_element_type, - const bool stable) +namespace v11 { +TopK::TopK(const Output& data, + const Output& k, + const int64_t axis, + const std::string& mode, + const std::string& sort, + const element::Type& index_element_type, + const bool stable) : TopK(data, k, axis, as_enum(mode), as_enum(sort), index_element_type, stable) {} -ov::op::v11::TopK::TopK(const Output& data, - const Output& k, - const int64_t axis, - const TopKMode mode, - const TopKSortType sort, - const element::Type& index_element_type, - const bool stable) +TopK::TopK(const Output& data, + const Output& k, + const int64_t axis, + const TopKMode mode, + const TopKSortType sort, + const element::Type& index_element_type, + const bool stable) : util::TopKBase{data, k, axis, mode, sort, index_element_type}, m_stable{stable} { constructor_validate_and_infer_types(); } -void ov::op::v11::TopK::validate_and_infer_types() { +void TopK::validate_and_infer_types() { OV_OP_SCOPE(v11_TopK_validate_and_infer_types); if (m_stable) { @@ -331,44 +249,34 @@ void ov::op::v11::TopK::validate_and_infer_types() { util::TopKBase::validate_and_infer_types(); } -bool ov::op::v11::TopK::visit_attributes(AttributeVisitor& visitor) { +bool TopK::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v11_TopK_visit_attributes); util::TopKBase::visit_attributes(visitor); visitor.on_attribute("stable", m_stable); return true; } -std::shared_ptr ov::op::v11::TopK::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr TopK::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v11_TopK_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), - new_args.at(1), - m_axis, - m_mode, - m_sort, - m_index_element_type, - m_stable); + return std::make_shared(new_args.at(0), + new_args.at(1), + m_axis, + m_mode, + m_sort, + m_index_element_type, + m_stable); } -bool ov::op::v11::TopK::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool TopK::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v11_TopK_evaluate); - return topk::TopK_evaluate(this, outputs, inputs); + return topk::evaluate(this, outputs, inputs); } -bool ov::op::v11::TopK::has_evaluate() const { +bool TopK::has_evaluate() const { OV_OP_SCOPE(v11_TopK_has_evaluate); - - switch (get_input_element_type(0)) { - case element::i32: - case element::i64: - case element::u32: - case element::u64: - case element::f16: - case element::f32: - break; - default: - return false; - } - return true; + return topk::validate::data_type(get_input_element_type(0)); } +} // namespace v11 +} // namespace op } // namespace ov diff --git a/src/core/src/op/util/evaluate_helpers.cpp b/src/core/src/op/util/evaluate_helpers.cpp index 4e21da40bfe013..cffc57e6fbd87c 100644 --- a/src/core/src/op/util/evaluate_helpers.cpp +++ b/src/core/src/op/util/evaluate_helpers.cpp @@ -4,8 +4,6 @@ #include "ngraph/op/util/evaluate_helpers.hpp" -#include "openvino/op/util/evaluate_helpers.hpp" - namespace ngraph { AxisSet get_normalized_axes_from_tensor(const HostTensorPtr tensor, const ngraph::Rank& rank, @@ -17,18 +15,3 @@ AxisSet get_normalized_axes_from_tensor(const HostTensorPtr tensor, return AxisSet{normalized_axes}; } } // namespace ngraph - -namespace ov { -namespace op { -namespace util { -std::vector get_tensors_partial_shapes(const TensorVector& tensors) { - std::vector shapes; - shapes.reserve(tensors.size()); - for (const auto& t : tensors) { - shapes.emplace_back(t.get_shape()); - } - return shapes; -} -} // namespace util -} // namespace op -} // namespace ov diff --git a/src/core/src/op/variadic_split.cpp b/src/core/src/op/variadic_split.cpp index ab94af3fd8345f..38b309325fe16a 100644 --- a/src/core/src/op/variadic_split.cpp +++ b/src/core/src/op/variadic_split.cpp @@ -2,33 +2,69 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/variadic_split.hpp" +#include "openvino/op/variadic_split.hpp" #include #include "bound_evaluate.hpp" #include "compare.hpp" #include "itt.hpp" -#include "ngraph/validation_util.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/reference/slice.hpp" #include "variadic_split_shape_inference.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace variadic_split { +namespace { -op::v1::VariadicSplit::VariadicSplit(const Output& data, - const Output& axis, - const Output& split_lengths) - : Op({data, axis, split_lengths}) { - constructor_validate_and_infer_types(); +bool has_axis_and_splits_bound_set(const Node* const node) { + return have_node_inputs_bounds_set(node, 1, 2); } -bool ngraph::op::v1::VariadicSplit::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v1_VariadicSplit_visit_attributes); +bool evaluate(TensorVector& outputs, const TensorVector& inputs) { + const auto& data_tensor = inputs[0]; + const auto& axis_tensor = inputs[1]; + const auto axis = + ov::util::normalize(get_tensor_data_as(axis_tensor).front(), data_tensor.get_shape().size()); + + ov::Coordinate upper_bounds(data_tensor.get_shape()); + ov::Coordinate lower_bounds(upper_bounds.size()); + upper_bounds[axis] = 0; + + const Strides default_strides(upper_bounds.size(), 1); + constexpr auto is_zero_dim = ov::cmp::Equal(0); + + for (auto& output : outputs) { + const auto& out_shape = output.get_shape(); + upper_bounds[axis] += out_shape[axis]; + + if (std::none_of(out_shape.cbegin(), out_shape.cend(), is_zero_dim)) { + reference::slice(static_cast(data_tensor.data()), + static_cast(output.data()), + data_tensor.get_shape(), + lower_bounds, + upper_bounds, + default_strides, + out_shape, + data_tensor.get_element_type().size()); + } + + lower_bounds[axis] = upper_bounds[axis]; + } + return true; } +} // namespace +} // namespace variadic_split -void ngraph::op::v1::VariadicSplit::validate_and_infer_types() { +namespace v1 { +VariadicSplit::VariadicSplit(const Output& data, const Output& axis, const Output& split_lengths) + : Op({data, axis, split_lengths}) { + constructor_validate_and_infer_types(); +} + +void VariadicSplit::validate_and_infer_types() { OV_OP_SCOPE(v1_VariadicSplit_validate_and_infer_types); for (size_t i = 0; i < get_input_size(); ++i) { set_input_is_relevant_to_value(i); @@ -45,107 +81,52 @@ void ngraph::op::v1::VariadicSplit::validate_and_infer_types() { } } -shared_ptr op::v1::VariadicSplit::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr VariadicSplit::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_VariadicSplit_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), new_args.at(2)); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2)); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace variadic_split { -namespace { -inline bool evaluate(const HostTensorPtr& in, - const HostTensorPtr& out, - const Coordinate& lower_bounds, - const Coordinate& upper_bounds) { - const auto& output_shape = out->get_shape(); - const auto has_nonzero_dims = std::none_of(output_shape.begin(), output_shape.end(), ov::cmp::Equal(0)); - - if (has_nonzero_dims) { - ov::reference::slice(in->get_data_ptr(), - out->get_data_ptr(), - in->get_shape(), - lower_bounds, - upper_bounds, - Strides(lower_bounds.size(), 1), - out->get_shape(), - in->get_element_type().size()); - return true; - } - return false; -} -} // namespace -} // namespace variadic_split - -bool op::v1::VariadicSplit::evaluate_variadic_split(const HostTensorVector& inputs, - const HostTensorVector& outputs) const { - const auto& data_tensor = inputs[0]; - const auto& axis_tensor = inputs[1]; - const auto& split_lengths_tensor = inputs[2]; - OPENVINO_ASSERT(axis_tensor->get_element_type().is_integral_number(), - "axis element type is not integral data type"); - OPENVINO_ASSERT(split_lengths_tensor->get_element_type().is_integral_number(), - "split_lengths element type is not integral data type"); - - OPENVINO_SUPPRESS_DEPRECATED_START - int64_t axis = host_tensor_2_vector(axis_tensor)[0]; - axis = ngraph::normalize_axis(this, axis, data_tensor->get_partial_shape().rank()); - OPENVINO_SUPPRESS_DEPRECATED_END +bool VariadicSplit::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v1_VariadicSplit_evaluate); - std::vector input_shapes = {data_tensor->get_partial_shape(), - axis_tensor->get_partial_shape(), - split_lengths_tensor->get_partial_shape()}; - auto output_shapes = shape_infer(this, input_shapes, make_tensor_accessor(inputs)); + if (inputs[1].get_element_type().is_integral_number() && inputs[2].get_element_type().is_integral_number()) { + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + OPENVINO_ASSERT(outputs.size() == output_shapes.size()); - const auto data_shape = data_tensor->get_shape(); - std::vector lower_bounds(data_shape.size(), 0); - std::vector upper_bounds = data_shape; - upper_bounds[axis] = 0; + auto out_partial_shape = output_shapes.cbegin(); + for (auto& output : outputs) { + output.set_shape(out_partial_shape->to_shape()); + ++out_partial_shape; + } - size_t split_pos = 0; - for (const auto& output : outputs) { - ov::Shape output_shape = output_shapes[split_pos++].get_shape(); - upper_bounds[axis] += output_shape[axis]; - output->set_shape(output_shape); - variadic_split::evaluate(data_tensor, output, lower_bounds, upper_bounds); - lower_bounds.at(axis) = upper_bounds.at(axis); + return variadic_split::evaluate(outputs, inputs); + } else { + return false; } - - return true; -} -bool op::v1::VariadicSplit::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v1_VariadicSplit_evaluate); - return evaluate_variadic_split(inputs, outputs); } -bool op::v1::VariadicSplit::has_evaluate() const { +bool VariadicSplit::has_evaluate() const { OV_OP_SCOPE(v1_VariadicSplit_has_evaluate); return get_input_element_type(1).is_integral_number() && get_input_element_type(2).is_integral_number(); } -bool op::v1::VariadicSplit::has_axis_and_splits_bound_set() const { - for (size_t i = 1; i < get_input_size(); ++i) { - if (!get_input_tensor(i).has_and_set_bound()) { - return false; - } - } - return true; -} - -bool op::v1::VariadicSplit::evaluate_lower(ov::TensorVector& output_values) const { +bool VariadicSplit::evaluate_lower(TensorVector& output_values) const { OV_OP_SCOPE(v1_Split_evaluate_lower); - - return has_axis_and_splits_bound_set() && default_lower_bound_evaluator(this, output_values); + return variadic_split::has_axis_and_splits_bound_set(this) && default_lower_bound_evaluator(this, output_values); } -bool op::v1::VariadicSplit::evaluate_upper(ov::TensorVector& output_values) const { +bool VariadicSplit::evaluate_upper(TensorVector& output_values) const { OV_OP_SCOPE(v1_Split_evaluate_upper); - - return has_axis_and_splits_bound_set() && default_upper_bound_evaluator(this, output_values); + return variadic_split::has_axis_and_splits_bound_set(this) && default_upper_bound_evaluator(this, output_values); } -bool op::v1::VariadicSplit::evaluate_label(TensorLabelVector& output_labels) const { +bool VariadicSplit::evaluate_label(TensorLabelVector& output_labels) const { OPENVINO_SUPPRESS_DEPRECATED_START - return has_axis_and_splits_bound_set() && default_label_evaluator(this, output_labels); + return variadic_split::has_axis_and_splits_bound_set(this) && default_label_evaluator(this, output_labels); OPENVINO_SUPPRESS_DEPRECATED_END } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp index 2e1db9dd6864f1..803364b289008d 100644 --- a/src/core/src/validation_util.cpp +++ b/src/core/src/validation_util.cpp @@ -1384,5 +1384,14 @@ std::shared_ptr get_constant_from_source(const Output& source) { return {}; } } + +std::vector get_tensors_partial_shapes(const TensorVector& tensors) { + std::vector shapes; + shapes.reserve(tensors.size()); + for (const auto& t : tensors) { + shapes.emplace_back(t.get_shape()); + } + return shapes; +} } // namespace util } // namespace ov diff --git a/src/core/tests/constant.cpp b/src/core/tests/constant.cpp index 45ad60d153627a..0feefb84bed8a5 100644 --- a/src/core/tests/constant.cpp +++ b/src/core/tests/constant.cpp @@ -266,8 +266,8 @@ TEST(constant, int4_string) { EXPECT_EQ(v[2], -1); const auto p = c.get_data_ptr(); - EXPECT_EQ(0x10, p[0]); - EXPECT_EQ(0xF0, p[1] & 0xF0); + EXPECT_EQ(0x01, p[0]); + EXPECT_EQ(0x0F, p[1] & 0x0F); EXPECT_EQ(input, c.get_value_strings()); @@ -318,8 +318,8 @@ TEST(constant, int4_vector_negative_number) { EXPECT_EQ(v[2], int8_t(-1)); const auto p = c.get_data_ptr(); - EXPECT_EQ(0xFE, p[0]); - EXPECT_EQ(0xF0, p[1] & 0xF0); + EXPECT_EQ(0xEF, p[0]); + EXPECT_EQ(0x0F, p[1] & 0x0F); } TEST(constant, int4_vector_positive_number) { @@ -332,8 +332,8 @@ TEST(constant, int4_vector_positive_number) { EXPECT_EQ(v[2], int8_t(5)); const auto p = c.get_data_ptr(); - EXPECT_EQ(0x12, p[0]); - EXPECT_EQ(0x50, p[1] & 0xF0); + EXPECT_EQ(0x21, p[0]); + EXPECT_EQ(0x05, p[1] & 0x0F); } TEST(constant, int4_vector_broadcast_negative_number) { @@ -795,8 +795,8 @@ TEST(constant, uint4_string) { EXPECT_EQ(v[3], 0); const auto p = c.get_data_ptr(); - EXPECT_EQ(p[0], 0x10); - EXPECT_EQ(p[1], 0x10); + EXPECT_EQ(p[0], 0x01); + EXPECT_EQ(p[1], 0x01); EXPECT_EQ(input, c.get_value_strings()); @@ -831,8 +831,8 @@ TEST(constant, uint4_vector) { EXPECT_EQ(v[3], 0); const auto p = c.get_data_ptr(); - EXPECT_EQ(p[0], 0x10); - EXPECT_EQ(p[1], 0x10); + EXPECT_EQ(p[0], 0x01); + EXPECT_EQ(p[1], 0x01); } TEST(constant, uint4_vector_broadcast) { diff --git a/src/core/tests/copy.cpp b/src/core/tests/copy.cpp index d7b2b4256f4aa3..f902d7485a1932 100644 --- a/src/core/tests/copy.cpp +++ b/src/core/tests/copy.cpp @@ -447,12 +447,12 @@ TEST(copy, random_uniform) { const auto min_val_param = make_shared(element::f32, Shape{1}); const auto max_val_param = make_shared(element::f32, Shape{1}); - auto out_shape = make_shared(element::i64, Shape{3}, std::vector{1, 2, 3}); + auto out_shape = make_shared(element::i64, Shape{3}, shape); auto ru = std::make_shared(out_shape, min_val_param, max_val_param, element::f32, 150, 10); // Call `evaluate` to update m_state - auto outputs = ov::TensorVector{{element::i64, out_shape->get_shape(), shape.data()}}; + auto outputs = ov::TensorVector{{element::i64, {1lu, 2lu, 3lu}}}; ru->evaluate(outputs, ov::TensorVector{{element::i64, out_shape->get_shape(), shape.data()}, {element::f32, min_val_param->get_shape(), &min}, diff --git a/src/core/tests/int4.cpp b/src/core/tests/int4.cpp index 2edb82dda0183c..d9a20fbf3649b2 100644 --- a/src/core/tests/int4.cpp +++ b/src/core/tests/int4.cpp @@ -15,9 +15,9 @@ TEST(int4, convert_i4_to_string) { vector values{171, 16}; auto constant = make_shared(element::i4, Shape{3}, &values[0]); - vector ref{"-6", "-5", "1"}; + vector ref{"-5", "-6", "0"}; for (size_t i = 0; i < 3; ++i) { - ASSERT_EQ(constant->convert_value_to_string(i), ref[i]); + EXPECT_EQ(constant->convert_value_to_string(i), ref[i]); } } diff --git a/src/core/tests/type_prop/reduce_prod.cpp b/src/core/tests/type_prop/reduce_prod.cpp index 20cc6699a6b61b..4def461f847a69 100644 --- a/src/core/tests/type_prop/reduce_prod.cpp +++ b/src/core/tests/type_prop/reduce_prod.cpp @@ -11,6 +11,9 @@ using Type = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_reduce_prod, ReduceTest, Type); INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_reduce_prod_et, ReduceArithmeticTest, Type); +INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_reduce_prod_dynamic, ReduceTest, Type); +INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_reduce_prod_dynamic_zero, ReduceTest, Type); +INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_reduce_prod_scalar, ReduceTest, Type); TEST(type_prop, reduce_prod_value_propagation) { const auto param = std::make_shared(element::f32, PartialShape{{1, 8}, {2, 3}, 6}); @@ -22,3 +25,36 @@ TEST(type_prop, reduce_prod_value_propagation) { EXPECT_EQ(reshape->get_element_type(), ov::element::f32); EXPECT_EQ(reshape->get_output_partial_shape(0), (PartialShape{{12, 144}})); } + +TEST(type_prop, reduce_prod_value_propagation_dynamic) { + const auto param = std::make_shared(element::f32, PartialShape{-1, 12, 32, 32}); + const auto shape_of = std::make_shared(param); + const auto reduce_prod = + std::make_shared(shape_of, ov::op::v0::Constant::create(element::i64, {1}, {0}), true); + const auto reshape = std::make_shared(param, reduce_prod, false); + + EXPECT_EQ(reshape->get_element_type(), ov::element::f32); + EXPECT_EQ(reshape->get_output_partial_shape(0), (PartialShape{-1})); +} + +TEST(type_prop, reduce_prod_value_propagation_dynamic_zero) { + const auto param = std::make_shared(element::f32, PartialShape{-1, 12, 0, -1}); + const auto shape_of = std::make_shared(param); + const auto reduce_prod = + std::make_shared(shape_of, ov::op::v0::Constant::create(element::i64, {1}, {0}), true); + const auto reshape = std::make_shared(param, reduce_prod, false); + + EXPECT_EQ(reshape->get_element_type(), ov::element::f32); + EXPECT_EQ(reshape->get_output_partial_shape(0), (PartialShape{0})); +} + +TEST(type_prop, reduce_prod_value_propagation_scalar) { + const auto param = std::make_shared(element::f32, PartialShape{0}); + const auto shape_of = std::make_shared(param); + const auto reduce_prod = + std::make_shared(shape_of, ov::op::v0::Constant::create(element::i64, {1}, {0}), true); + const auto reshape = std::make_shared(param, reduce_prod, false); + + EXPECT_EQ(reshape->get_element_type(), ov::element::f32); + EXPECT_EQ(reshape->get_output_partial_shape(0), (PartialShape{0})); +} diff --git a/src/core/tests/uint4.cpp b/src/core/tests/uint4.cpp index 5c3b0a5e06af20..8285fdb3cd5e1c 100644 --- a/src/core/tests/uint4.cpp +++ b/src/core/tests/uint4.cpp @@ -13,9 +13,9 @@ TEST(uint4, convert_u4_to_string) { vector values{171, 16}; auto constant = make_shared(element::u4, Shape{3}, &values[0]); - vector ref{"10", "11", "1"}; + vector ref{"11", "10", "0"}; for (size_t i = 0; i < 3; ++i) { - ASSERT_EQ(constant->convert_value_to_string(i), ref[i]); + EXPECT_EQ(constant->convert_value_to_string(i), ref[i]); } } diff --git a/src/frontends/onnx/frontend/src/op/random_uniform.cpp b/src/frontends/onnx/frontend/src/op/random_uniform.cpp index 6215dcc491c07d..a26ed672a0cc15 100644 --- a/src/frontends/onnx/frontend/src/op/random_uniform.cpp +++ b/src/frontends/onnx/frontend/src/op/random_uniform.cpp @@ -29,6 +29,7 @@ OutputVector random_uniform(const Node& node) { const auto target_type = common::get_ngraph_element_type(dtype); const uint64_t global_seed = 0; + // TODO: This multiplication leads to a mismatch in accuracy. Issue: 123003 const auto seed_uint64 = static_cast(seed * 1000); return {std::make_shared(target_shape_const, diff --git a/src/frontends/paddle/src/op/slice_ops.hpp b/src/frontends/paddle/src/op/slice_ops.hpp index dc2a609ba18513..1f8798bc00e91f 100644 --- a/src/frontends/paddle/src/op/slice_ops.hpp +++ b/src/frontends/paddle/src/op/slice_ops.hpp @@ -33,63 +33,16 @@ NamedOutputs slice_op(const NodeContext& node, const bool& stride_input) { Output start_idx_node = idx_node("StartsTensor", "StartsTensorList", "starts", node); Output end_idx_node = idx_node("EndsTensor", "EndsTensorList", "ends", node); Output strides_idx_node; - if (stride_input) - strides_idx_node = idx_node("StridesTensor", "StridesTensorList", "strides", node); - - // The following process is: - // Given: - // data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] // shape is: [2, 4] - // axes = [0] - // starts = [1] - // ends = [2] - // Our process is: - // 1. Get 'axes': [0, 1], 'starts', 'ends' - // 2. Get data shape: [2,4] and dims: 2 - // 3. Create two tensor t1 and t2, shape is the dims from step2: 2. t1: [0, 0], t2: [INT_MAX, INT_MAX] - // 4. Use 'ScatterNDUpdate' to update some elements in t1, the updated indexes are coming from 'axes', the contents - // are coming from 'starts', t1: [1, 0]; apply the similar process to t2 - // 5. Call 'StrideSlice' with t1 and t2 - // Why using ScatterNDUpdate is that 'axes' may be discontinuous. - - // the shape of input, such as [2, 4] - const auto shape_node = std::make_shared(data, element::Type_t::i32); - // the input dim, such as [2] - const auto rank_node = std::make_shared(shape_node, element::i32); - const auto const_0_node = default_opset::Constant::create(element::i32, {}, {0}); - const auto const_max_node = default_opset::Constant::create(element::i32, {}, {INT_MAX}); - const auto const_1_node = default_opset::Constant::create(element::i32, {}, {1}); - // t1: [0, 0] - const auto start_node = std::make_shared(const_0_node, rank_node); - // t2: [INT_MAX, INT_MAX] - const auto end_node = std::make_shared(const_max_node, rank_node); - const auto strides_node = std::make_shared(const_1_node, rank_node); - const auto axes_node = default_opset::Constant::create(element::i32, {axes.size(), 1}, axes); - // update t1 - const auto fixed_start_node = - std::make_shared(start_node, axes_node, start_idx_node); - // update t2 - const auto fixed_end_node = std::make_shared(end_node, axes_node, end_idx_node); - std::shared_ptr stride_slice_node; if (stride_input) { - const auto fixed_strides_node = - std::make_shared(strides_node, axes_node, strides_idx_node); - - stride_slice_node = std::make_shared(data, - fixed_start_node, - fixed_end_node, - fixed_strides_node, - std::vector{0}, - std::vector{0}); + strides_idx_node = idx_node("StridesTensor", "StridesTensorList", "strides", node); } else { - stride_slice_node = std::make_shared(data, - fixed_start_node, - fixed_end_node, - std::vector{0}, - std::vector{0}); + strides_idx_node = + default_opset::Constant::create(element::i32, start_idx_node.get_shape(), std::vector{1}); } - + const auto axes_node = default_opset::Constant::create(element::i32, {axes.size()}, axes); + const auto slice_node = + std::make_shared(data, start_idx_node, end_idx_node, strides_idx_node, axes_node); const auto decrease_axis = node.get_attribute>("decrease_axis"); - if (decrease_axis.size() > 0) { PartialShape input_shape = data.get_partial_shape(); PADDLE_OP_CHECK(node, @@ -99,18 +52,19 @@ NamedOutputs slice_op(const NodeContext& node, const bool& stride_input) { // according to paddle slice_op, when all axes are decreased, output shape is [1], instead of scalar. // Ref: paddle/fluid/operators/slice_op.h auto decreased_node = std::make_shared( - stride_slice_node, + slice_node, std::make_shared(element::i64, Shape{1}, 1), false); return node.default_single_output_mapping({decreased_node}, {"Out"}); } + const auto squeeze_index_node = default_opset::Constant::create(element::i32, {decrease_axis.size()}, decrease_axis); - const auto decreased_node = std::make_shared(stride_slice_node, squeeze_index_node); + const auto decreased_node = std::make_shared(slice_node, squeeze_index_node); return node.default_single_output_mapping({decreased_node}, {"Out"}); + } else { + return node.default_single_output_mapping({slice_node}, {"Out"}); } - - return node.default_single_output_mapping({stride_slice_node}, {"Out"}); } } // namespace } // namespace op diff --git a/src/frontends/paddle/tests/CMakeLists.txt b/src/frontends/paddle/tests/CMakeLists.txt index 82a68ce62bc206..6d373e67c0a663 100644 --- a/src/frontends/paddle/tests/CMakeLists.txt +++ b/src/frontends/paddle/tests/CMakeLists.txt @@ -4,25 +4,6 @@ set(TARGET_NAME "paddle_tests") -ov_add_test_target( - NAME ${TARGET_NAME} - ROOT ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDENCIES - paddle_test_models - openvino_paddle_frontend - paddle_fe_standalone_build_test - LINK_LIBRARIES - cnpy - frontend_shared_test_classes - openvino_paddle_frontend - openvino::runtime - gtest_main_manifest - func_test_utils - ADD_CLANG_FORMAT - # LABELS - # OV UNIT PADDLE_FE -) - # Test model generating set(PADDLE_REQ "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt") if(Python3_Interpreter_FOUND) @@ -43,6 +24,29 @@ endif() # PDPD 2.5.1 is not compatible with tests models we use set(paddlepaddle_FOUND OFF) +if(paddlepaddle_FOUND) + set(ctest_labels OV UNIT) +endif() + +ov_add_test_target( + NAME ${TARGET_NAME} + ROOT ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDENCIES + paddle_test_models + openvino_paddle_frontend + paddle_fe_standalone_build_test + LINK_LIBRARIES + cnpy + frontend_shared_test_classes + openvino_paddle_frontend + openvino::runtime + gtest_main_manifest + func_test_utils + ADD_CLANG_FORMAT + LABELS + ${ctest_labels} PADDLE_FE +) + set(TEST_PADDLE_MODELS_DIRNAME ${TEST_MODEL_ZOO}/paddle_test_models) target_compile_definitions(${TARGET_NAME} PRIVATE -D TEST_PADDLE_MODELS_DIRNAME=\"${TEST_PADDLE_MODELS_DIRNAME}/\") diff --git a/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py b/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py index f89e18d7500c65..f2a6d1a8769295 100644 --- a/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py +++ b/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py @@ -20,7 +20,7 @@ def slice(name : str, x, axes : list, start : list, end : list): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): node_x = paddle.static.data(name='x', shape=x.shape, dtype = data_type) - out = paddle.fluid.layers.slice(node_x, axes = axes, starts = start, ends = end) + out = paddle.slice(node_x, axes = axes, starts = start, ends = end) cpu = paddle.static.cpu_places(1) exe = paddle.static.Executor(cpu[0]) diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 0910aa3e057e72..14c1094b3d098f 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -41,6 +41,7 @@ #include "transforms/softmax_reshape_elimination.hpp" #include "transforms/string_equality_replacer.hpp" #include "transforms/tuple_unpack_replacer.hpp" +#include "transforms/u4_block_repack.hpp" #include "translate_session.hpp" namespace ov { @@ -200,6 +201,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/frontends/pytorch/src/op/full.cpp b/src/frontends/pytorch/src/op/full.cpp index cf60d096555007..e8bfa1c7ce99d7 100644 --- a/src/frontends/pytorch/src/op/full.cpp +++ b/src/frontends/pytorch/src/op/full.cpp @@ -3,10 +3,19 @@ // #include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" #include "utils.hpp" namespace ov { @@ -71,12 +80,17 @@ OutputVector translate_full_like(const NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_fill_(const NodeContext& context) { - num_inputs_check(context, 2, 2); +OutputVector translate_fill(const NodeContext& context) { + num_inputs_check(context, 2, 3); auto input = context.get_input(0); auto value = context.get_input(1); auto sizes = context.mark_node(std::make_shared(input, element::i32)); - return {base_translate_full_with_convertlike(context, sizes, value, input)}; + auto out = context.input_is_none(2) ? input : context.get_input(2); + auto result = base_translate_full_with_convertlike(context, sizes, value, out); + if (!context.input_is_none(2)) { + context.mutate_input(2, result); + } + return {result}; }; OutputVector translate_new_full(const NodeContext& context) { @@ -187,6 +201,67 @@ OutputVector translate_empty(const NodeContext& context) { } return {empty}; }; + +OutputVector translate_fill_diagonal(const NodeContext& context) { + // aten::fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!) + // realization inspired by numpy: + // https://github.com/numpy/numpy/blob/c236e694d222ae6b812cb8dab54471bc4c912f0f/numpy/lib/_index_tricks_impl.py#L787-L918 + num_inputs_check(context, 3, 3); + auto input_tensor = context.get_input(0); + auto fill_value = context.get_input(1); + auto input_shape = context.mark_node(std::make_shared(input_tensor, element::i32)); + auto input_rank = input_tensor.get_partial_shape().rank(); + auto const_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); + auto const_zero = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); + auto const_one_s = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); + auto const_zero_s = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + auto const_neg_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); + if (input_rank.is_dynamic() || input_rank.get_length() < 2) { + FRONT_END_OP_CONVERSION_CHECK(false, "aten::fill_diagonal_ required tensor with static rank >= 2 "); + } + auto flatten_input = context.mark_node(std::make_shared(input_tensor, const_neg_one, false)); + auto wrap = context.const_input(2); + Output step; + // default value for end - number of elements in input tensor + Output end; + auto flatten_shape = context.mark_node(std::make_shared(flatten_input, element::i32)); + end = context.mark_node(std::make_shared(flatten_shape, const_neg_one, const_zero)); + auto last_dim = context.mark_node(std::make_shared(input_shape, const_neg_one, const_zero)); + if (input_rank.get_length() == 2) { + // step = a.shape[1] + 1 + step = context.mark_node(std::make_shared(last_dim, const_one_s)); + if (!wrap) { + // if not wrap. and non squared matrix, do not fill tail by cutting end to square + end = context.mark_node(std::make_shared(last_dim, last_dim)); + } + } else { + // step = 1 + (cumprod(a.shape[:-1])).sum() + // cumprod operation is not supported by ov, but with condition that >2D tensors supported only if all dims + // equals cumprod can be represented as finite geometric serial and its sum can be found by formula + // b0 * (bn * q - 1) / (q - 1), where in this particual case q = b0, bn = b0 ^ n + auto rank_minus_one = + context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_rank.get_length() - 1})); + auto dim_power = context.mark_node(std::make_shared(last_dim, rank_minus_one)); + auto dim_power_minus_one = context.mark_node(std::make_shared(dim_power, const_neg_one)); + auto dim_minus_one = context.mark_node(std::make_shared(last_dim, const_neg_one)); + auto q = context.mark_node(std::make_shared(dim_power_minus_one, dim_minus_one, true)); + auto cumprod_sum = context.mark_node(std::make_shared(last_dim, q)); + step = context.mark_node(std::make_shared(const_one_s, cumprod_sum)); + // wrap parameter is not applicable in this case as supported only equal dims on pytorch side + } + step = context.mark_node(std::make_shared(step, const_zero)); + end = context.mark_node(std::make_shared(end, const_zero)); + auto indices = context.mark_node(std::make_shared(const_zero_s, end, step, element::i32)); + auto indices_shape = context.mark_node(std::make_shared(indices, element::i32)); + fill_value = context.mark_node(std::make_shared(fill_value, input_tensor)); + fill_value = context.mark_node(std::make_shared(fill_value, indices_shape)); + // fill values + auto filled_tensor = + context.mark_node(std::make_shared(flatten_input, indices, fill_value, const_zero)); + // reshape back to original shape + filled_tensor = context.mark_node(std::make_shared(filled_tensor, input_shape, false)); + return {filled_tensor}; +} } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 4254ba85130afd..5614a3881c3573 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -66,7 +66,8 @@ OP_CONVERTER(translate_expand_as); OP_CONVERTER(translate_eye); OP_CONVERTER(translate_fake_quantize_per_channel_affine); OP_CONVERTER(translate_fake_quantize_per_tensor_affine); -OP_CONVERTER(translate_fill_); +OP_CONVERTER(translate_fill); +OP_CONVERTER(translate_fill_diagonal); OP_CONVERTER(translate_flatten); OP_CONVERTER(translate_flip); OP_CONVERTER(translate_floor_divide); @@ -324,7 +325,9 @@ const std::map get_supported_ops_ts() { {"aten::fake_quantize_per_channel_affine", op::translate_fake_quantize_per_channel_affine}, {"aten::fake_quantize_per_tensor_affine", op::translate_fake_quantize_per_tensor_affine}, {"aten::feature_dropout", op::skip_node}, - {"aten::fill_", op::inplace_op}, + {"aten::fill", op::translate_fill}, + {"aten::fill_", op::inplace_op}, + {"aten::fill_diagonal_", op::inplace_op}, {"aten::flatten", op::quantizable_op}, {"aten::flip", op::translate_flip}, {"aten::floor", op::translate_1to1_match_1_inputs}, diff --git a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp index f8de5275b69ae8..67ea5f4f9e1ff9 100644 --- a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp @@ -12,6 +12,7 @@ #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "utils.hpp" +#include "utils_quantize.hpp" namespace ov { namespace frontend { @@ -38,22 +39,31 @@ AtenStackListConstructReplacer::AtenStackListConstructReplacer() { auto axis_node = pattern_map.at(axis).get_node_shared_ptr(); auto axis_const = std::dynamic_pointer_cast(axis_node); auto axis = axis_const->cast_vector(); + if (axis.size() != 1) { + add_exception_to_fw_node(stack, "aten::stack has multiple axes, only one is supported."); + return false; + } // Check if ListConstruct is an input if (auto list_construct_node = cast_fw_node(input_node, "prim::ListConstruct")) { const auto& list_inputs = list_construct_node->input_values(); - OutputVector node_vector; - auto zero = v0::Constant::create(element::i32, Shape{}, {0}); - // Iterate over values in ListConstruct - for (const auto& list_input : list_inputs) { - auto node = concat_list_construct(list_input); - auto unsqueezed_node = std::make_shared(node, axis_const); - node_vector.push_back(unsqueezed_node); + std::shared_ptr node; + if (auto compression = u4_compression_stack(list_inputs, axis[0])) { + node = compression; + } else { + OutputVector node_vector; + auto zero = v0::Constant::create(element::i32, Shape{}, {0}); + // Iterate over values in ListConstruct + for (const auto& list_input : list_inputs) { + auto node = concat_list_construct(list_input); + auto unsqueezed_node = std::make_shared(node, axis_const); + node_vector.push_back(unsqueezed_node); + } + // Concat vectors on provided axis + node = std::make_shared(node_vector, axis[0]); } - // Concat vectors on provided axis - auto concat = std::make_shared(node_vector, axis[0]); - copy_runtime_info_and_name(stack, {concat}, {input_node}); - replace_node(stack, concat); + copy_runtime_info_and_name(stack, {node}, {input_node}); + replace_node(stack, node); return true; } add_exception_to_fw_node(stack, "Unsupported case of aten::stack."); diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp new file mode 100644 index 00000000000000..e08ebd728b050e --- /dev/null +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -0,0 +1,98 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "u4_block_repack.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "utils.hpp" +#include "utils_quantize.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +using namespace ov::op; +using namespace ov::pass::pattern; + +U4BlockRepack::U4BlockRepack() { + const auto m_constant = ov::pass::pattern::wrap_type(); + const auto m_reshape1 = ov::pass::pattern::wrap_type({m_constant, any_input()}); + const auto m_transpose = ov::pass::pattern::wrap_type({m_reshape1, any_input()}); + const auto m_reshape2 = ov::pass::pattern::wrap_type({m_transpose, any_input()}); + + auto pack_byte = [](uint8_t lo, uint8_t hi) { + return (hi << 4) | (lo & 0x0F); + }; // swap halfs because Convert op assumes this layout + + auto get_u4 = [](const uint8_t* src, size_t idx) { + const size_t byte_idx = idx / 2; + const uint8_t bit_shift = 4 * (idx % 2); + return (src[byte_idx] >> bit_shift) & 0xF; + }; + + register_matcher( + std::make_shared(m_reshape2, "ov::frontend::pytorch::pass::U4BlockRepack"), + [=](ov::pass::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto constant = + std::dynamic_pointer_cast(pattern_to_output[m_constant].get_node_shared_ptr()); + if (!constant) + return false; + auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr(); + auto transpose = pattern_to_output[m_transpose].get_node_shared_ptr(); + auto reshape2 = pattern_to_output[m_reshape2].get_node_shared_ptr(); + + if (constant->get_element_type() != element::u4) + return false; + + // FIXME: Check reshape/transpose/reshape target shapes and axes permutation; now they are supposed to be + // always in expected form + + auto source_shape = reshape1->get_output_shape(0); + + if (source_shape.size() != 3) + return false; + + auto destination_shape = reshape2->get_output_shape(0); + + size_t n_blocks = source_shape[0]; + size_t block_height = source_shape[1]; + size_t lane_size = source_shape[2]; // size in u4 units + size_t block_size = block_height * lane_size / 2; // size in bytes + + auto src = constant->get_data_ptr(); + + auto new_const = std::make_shared(element::u4, destination_shape); + auto dst = const_cast( // const_cast? + reinterpret_cast(new_const->get_data_ptr())); // TODO: How to better accees u4 data? + + for (size_t iblock = 0; iblock < n_blocks; ++iblock) { + auto src_block = src + iblock * block_size; + auto dst_block = dst + iblock * block_size; + for (size_t i = 0; i < lane_size; ++i) { + for (size_t j = 0; j < block_height / 2; ++j) { // /2 because we handle two bytes at once + uint8_t lo = get_u4(src_block, 2 * j * lane_size + i); + uint8_t hi = get_u4(src_block, (2 * j + 1) * lane_size + i); + dst_block[i * block_height / 2 + j] = pack_byte(lo, hi); + } + } + } + + copy_runtime_info(NodeVector{constant, reshape1, transpose, reshape2}, new_const); + replace_node(reshape2, new_const); + + return true; + }); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp new file mode 100644 index 00000000000000..aa6e00f70e564c --- /dev/null +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +class U4BlockRepack : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::U4BlockRepack"); + U4BlockRepack(); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index 5af546f3d5be5d..1346fd76971fcc 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -6,6 +6,7 @@ #include "openvino/frontend/pytorch/node_context.hpp" #include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/convert_like.hpp" #include "openvino/op/fake_quantize.hpp" @@ -13,6 +14,7 @@ #include "openvino/op/reshape.hpp" #include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/subtract.hpp" +#include "transformations/utils/utils.hpp" namespace ov { namespace frontend { @@ -168,6 +170,53 @@ std::shared_ptr cast_quantized_fw_node(std::shared_ptr no return quant_node; } +std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64_t axis) { + // Part 1: Detect pattern + + if (list_elems.size() != 2) + return nullptr; + auto bitwise_and = cast_fw_node(list_elems[0].get_node_shared_ptr(), "aten::bitwise_and"); + if (!bitwise_and) + return nullptr; + auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), "aten::bitwise_right_shift"); + if (!bitwise_shift) + return nullptr; + + auto weights_u8 = std::dynamic_pointer_cast(bitwise_and->get_input_node_shared_ptr(0)); + if (weights_u8 != std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0))) + return nullptr; + + if (weights_u8->get_output_element_type(0) != element::u8) + return nullptr; + + if (axis != -1 && static_cast(axis) != weights_u8->get_shape().size() - 1) + return nullptr; + + if (!ov::op::util::has_constant_value(bitwise_and->get_input_node_shared_ptr(1), 0x0F)) + return nullptr; + + if (!ov::op::util::has_constant_value(bitwise_shift->get_input_node_shared_ptr(1), 4)) + return nullptr; + + // Pattern detected, weights_u8 is target u8 packed constant with weights + + // Part 2: Form u4 constant by repacking of the original weights_u8 + // Repacking transformes half of lanes to interleaved representation. + + auto u8_shape = weights_u8->get_shape(); + size_t full_size = shape_size(u8_shape); + auto src = weights_u8->get_data_ptr(); + + auto u4_shape = u8_shape; + u4_shape.push_back(2); + auto new_const = std::make_shared(element::u4, u4_shape); + auto dst = const_cast(reinterpret_cast(new_const->get_data_ptr())); + + std::copy(src, src + full_size, dst); // TODO: Avoid copying, reuse the same constant + copy_runtime_info_and_name(weights_u8, {new_const}, {weights_u8, bitwise_and, bitwise_shift}); + return new_const; +} + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/pytorch/src/utils_quantize.hpp b/src/frontends/pytorch/src/utils_quantize.hpp index 69917e7b8bce3e..e02bce880d2480 100644 --- a/src/frontends/pytorch/src/utils_quantize.hpp +++ b/src/frontends/pytorch/src/utils_quantize.hpp @@ -166,6 +166,12 @@ OutputVector quantizable_op(const NodeContext& context) { } } // namespace op +/** + * Captures aten::stack([aten::bitwise_and(Constant(u8)), aten::bitwise_right_shift(Constant(u8))], dim=-1). + * This pattern is transformed to a single Constant with element_type=u4. + */ +std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64_t axis); + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index fce3af3f0a235b..bc1a657faf54fb 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -130,6 +130,7 @@ const std::map get_supported_ops() { {"AvgPool3D", CreatorFunction(translate_avg_pool_op)}, {"BatchMatMul", CreatorFunction(translate_batch_mat_mul_op)}, {"BatchMatMulV2", CreatorFunction(translate_batch_mat_mul_op)}, + {"BatchMatMulV3", CreatorFunction(translate_batch_mat_mul_with_type_op)}, {"BatchToSpaceND", CreatorFunction(translate_batch_to_space_nd_op)}, {"BroadcastArgs", CreatorFunction(translate_broadcast_args_op)}, {"BroadcastTo", CreatorFunction(translate_broadcast_to_op)}, @@ -286,6 +287,8 @@ const std::map get_supported_ops() { {"While", CreatorFunction(translate_while_op)}, {"Where", CreatorFunction(translate_where_op)}, {"Xdivy", CreatorFunction(translate_x_div_y_op)}, + {"Xlog1py", CreatorFunction(translate_xlog1py_op)}, + {"Xlogy", CreatorFunction(translate_xlogy_op)}, {"ZerosLike", CreatorFunction(translate_zeros_like_op)}, // Translators for SavedModel and MetaGraph diff --git a/src/frontends/tensorflow/tests/CMakeLists.txt b/src/frontends/tensorflow/tests/CMakeLists.txt index 45e64536b60f23..a532441ac393eb 100644 --- a/src/frontends/tensorflow/tests/CMakeLists.txt +++ b/src/frontends/tensorflow/tests/CMakeLists.txt @@ -10,6 +10,15 @@ if (NOT ENABLE_INTEL_CPU) set(EXCLUDED_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/compilation.cpp) endif() +ov_check_pip_packages(REQUIREMENTS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" + MESSAGE_MODE WARNING + WARNING_MESSAGE "TensorFlow testing models weren't generated, some tests will fail due models not found" + RESULT_VAR tensorflow_FOUND) + +if(tensorflow_FOUND) + set(ctest_labels OV UNIT) +endif() + ov_add_test_target( NAME ${TARGET_NAME} ROOT ${CMAKE_CURRENT_SOURCE_DIR} @@ -22,16 +31,11 @@ ov_add_test_target( openvino_tensorflow_common ADD_CLANG_FORMAT LABELS - OV UNIT TF_FE + ${ctest_labels} TF_FE ) # Test model generating -ov_check_pip_packages(REQUIREMENTS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" - MESSAGE_MODE WARNING - WARNING_MESSAGE "TensorFlow testing models weren't generated, some tests will fail due models not found" - RESULT_VAR tensorflow_FOUND) - set(TEST_TENSORFLOW_MODELS_DIRNAME ${TEST_MODEL_ZOO}/tensorflow_test_models) target_compile_definitions(${TARGET_NAME} PRIVATE -D TEST_TENSORFLOW_MODELS_DIRNAME=\"${TEST_TENSORFLOW_MODELS_DIRNAME}/\") diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 17a865acfb0e99..54f1dff243efd1 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -38,6 +38,7 @@ OP_CONVERTER(translate_arg_max_op); OP_CONVERTER(translate_arg_min_op); OP_CONVERTER(translate_avg_pool_op); OP_CONVERTER(translate_batch_mat_mul_op); +OP_CONVERTER(translate_batch_mat_mul_with_type_op); OP_CONVERTER(translate_batch_to_space_nd_op); OP_CONVERTER(translate_bias_add_op); OP_CONVERTER(translate_broadcast_args_op); @@ -149,6 +150,8 @@ OP_CONVERTER(translate_unravel_index_op); OP_CONVERTER(translate_unsorted_segment_sum_op); OP_CONVERTER(translate_where_op); OP_CONVERTER(translate_x_div_y_op); +OP_CONVERTER(translate_xlog1py_op); +OP_CONVERTER(translate_xlogy_op); OP_CONVERTER(translate_zeros_like_op); // Translators for internal operations diff --git a/src/frontends/tensorflow_common/src/op/matmul.cpp b/src/frontends/tensorflow_common/src/op/matmul.cpp index dd3aba71ddf5cd..21a0591d109b69 100644 --- a/src/frontends/tensorflow_common/src/op/matmul.cpp +++ b/src/frontends/tensorflow_common/src/op/matmul.cpp @@ -35,6 +35,26 @@ OutputVector translate_batch_mat_mul_op(const NodeContext& node) { set_node_name(node.get_name(), result); return result->outputs(); } + +OutputVector translate_batch_mat_mul_with_type_op(const NodeContext& node) { + auto x = node.get_input(0); + auto y = node.get_input(1); + + auto input_type = x.get_element_type(); + + auto adj_x = node.get_attribute("adj_x", false); + auto adj_y = node.get_attribute("adj_y", false); + auto t_out = node.get_attribute("Tout", input_type); + + auto result = make_shared(x, y, adj_x, adj_y)->output(0); + + if (t_out != input_type) { + result = make_shared(result, t_out); + } + + set_node_name(node.get_name(), result.get_node_shared_ptr()); + return {result}; +} } // namespace op } // namespace tensorflow } // namespace frontend diff --git a/src/frontends/tensorflow_common/src/op/xlog1py.cpp b/src/frontends/tensorflow_common/src/op/xlog1py.cpp new file mode 100644 index 00000000000000..cf24fec54382d3 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/xlog1py.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/log.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/select.hpp" + +using namespace std; +using namespace ov::opset10; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_xlog1py_op(const NodeContext& node) { + default_op_checks(node, 2, {"Xlog1py"}); + auto x = node.get_input(0); + auto y = node.get_input(1); + + // prepare auxiliary constants of the same type as the input + auto zero = create_same_type_const_scalar(x, 0); + auto one = create_same_type_const_scalar(y, 1); + + // compute a mask to identify where x is equal to 0 + auto is_zero = make_shared(x, zero); + + // compute x * log(y + 1) elementwise + auto xlog1py = make_shared(x, make_shared(make_shared(y, one))); + + // create the output tensor using Select to handle the x == 0 condition + auto result = make_shared(is_zero, zero, xlog_y); + + set_node_name(node.get_name(), result); + return result->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_lite/tests/CMakeLists.txt b/src/frontends/tensorflow_lite/tests/CMakeLists.txt index ddb2fba4d9c429..ef6ca4ee670732 100644 --- a/src/frontends/tensorflow_lite/tests/CMakeLists.txt +++ b/src/frontends/tensorflow_lite/tests/CMakeLists.txt @@ -4,6 +4,15 @@ set(TARGET_NAME "ov_tensorflow_lite_frontend_tests") +ov_check_pip_packages(REQUIREMENTS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" + MESSAGE_MODE WARNING + WARNING_MESSAGE "TensorFlow Lite testing models weren't generated, some tests will fail due models not found" + RESULT_VAR tensorflow_FOUND) + +if(tensorflow_FOUND) + set(ctest_labels OV UNIT) +endif() + ov_add_test_target( NAME ${TARGET_NAME} ROOT ${CMAKE_CURRENT_SOURCE_DIR} @@ -16,16 +25,11 @@ ov_add_test_target( openvino_tensorflow_lite_frontend ADD_CLANG_FORMAT LABELS - OV UNIT TFL_FE + ${ctest_labels} TFL_FE ) # Test model generating -ov_check_pip_packages(REQUIREMENTS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" - MESSAGE_MODE WARNING - WARNING_MESSAGE "TensorFlow Lite testing models weren't generated, some tests will fail due models not found" - RESULT_VAR tensorflow_FOUND) - set(TEST_TENSORFLOW_LITE_MODELS_DIRNAME ${TEST_MODEL_ZOO}/tensorflow_lite_test_models) target_compile_definitions(${TARGET_NAME} PRIVATE -D TEST_TENSORFLOW_LITE_MODELS_DIRNAME=\"${TEST_TENSORFLOW_LITE_MODELS_DIRNAME}/\") diff --git a/src/frontends/tests/frontend/shared/CMakeLists.txt b/src/frontends/tests/frontend/shared/CMakeLists.txt index f5d2809205db0e..f413e359afb738 100644 --- a/src/frontends/tests/frontend/shared/CMakeLists.txt +++ b/src/frontends/tests/frontend/shared/CMakeLists.txt @@ -17,8 +17,10 @@ target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../ target_link_libraries(${TARGET_NAME} PUBLIC - offline_transformations - common_test_utils func_test_utils openvino::util + openvino::offline_transformations + common_test_utils + func_test_utils + openvino::util openvino::runtime PRIVATE cnpy) diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 3e61c370d20482..11ad85b3740d6a 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -199,8 +199,8 @@ if(WIN32) set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s) endif() -target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ngraph - frontend_common::static inference_engine_transformations openvino::pugixml) +target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} + openvino::runtime::dev openvino::pugixml) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) @@ -215,7 +215,8 @@ set_target_properties(${TARGET_NAME}_obj # Export for developer package -openvino_developer_export_targets(COMPONENT core_legacy TARGETS ${TARGET_NAME}_plugin_api) +ov_developer_package_export_targets(TARGET ${TARGET_NAME}_s) +ov_developer_package_export_targets(TARGET ${TARGET_NAME}_plugin_api) # Install static libraries for case BUILD_SHARED_LIBS=OFF diff --git a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp index 7936130997b6a2..d8f65b086fcbe0 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp @@ -131,23 +131,25 @@ void reserve_cpu_by_streams_info(const std::vector> _streams_in } for (size_t i = 0; i < _cpu_mapping_table.size(); i++) { - std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) + - std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) + - std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]); - for (size_t j = 0; j < stream_conditions.size(); j++) { - if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) != - stream_conditions[j].end()) { - _stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]); - _cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status; - if (static_cast(_stream_processors[stream_pos[j]].size()) == - streams_table[j][THREADS_PER_STREAM]) { - stream_pos[j]++; - stream_num[j]++; - } - if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) { - stream_conditions[j].clear(); + if (_cpu_mapping_table[i][CPU_MAP_USED_FLAG] == NOT_USED) { + std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) + + std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) + + std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]); + for (size_t j = 0; j < stream_conditions.size(); j++) { + if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) != + stream_conditions[j].end()) { + _stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]); + _cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status; + if (static_cast(_stream_processors[stream_pos[j]].size()) == + streams_table[j][THREADS_PER_STREAM]) { + stream_pos[j]++; + stream_num[j]++; + } + if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) { + stream_conditions[j].clear(); + } + break; } - break; } } } diff --git a/src/inference/tests/unit/cpu_reserve_test.cpp b/src/inference/tests/unit/cpu_reserve_test.cpp index 7a5427d777bea7..e5fe6b40abdf7b 100644 --- a/src/inference/tests/unit/cpu_reserve_test.cpp +++ b/src/inference/tests/unit/cpu_reserve_test.cpp @@ -44,6 +44,12 @@ class LinuxCpuReserveTests : public ov::test::TestsCommon, test_data._cpu_status); ASSERT_EQ(test_data._stream_processors, test_processors); + for (size_t i = 0; i < test_processors.size(); i++) { + for (size_t j = 0; j < test_processors[i].size(); j++) { + ASSERT_EQ(test_data._cpu_mapping_table[test_processors[i][j]][CPU_MAP_USED_FLAG], + test_data._cpu_status); + } + } } }; @@ -869,8 +875,6 @@ LinuxCpuReserveTestCase _1socket_16cores_hyper_4streams_ecoreonly = { }, NOT_USED, }; -// streams_info_table={1, MAIN_CORE_PROC, 36}, but the number of physical cores is 18, -// in this case, threads are assigned on physical and logical cores. LinuxCpuReserveTestCase _1socket_18cores_hyper_1streams = { 36, 1, @@ -933,6 +937,38 @@ LinuxCpuReserveTestCase _1socket_18cores_hyper_2streams = { }, NOT_USED, }; +// other plugin reserved 2 threads +LinuxCpuReserveTestCase _1socket_18cores_hyper_plugin_reserve_2threads = { + 36, + 1, + {{36, 18, 0, 18, 0, 0}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, + {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, + {12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, + {14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, + {16, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 0, 0, 9, HYPER_THREADING_PROC, 8, -1}, + {18, 0, 0, 0, MAIN_CORE_PROC, 0, CPU_USED}, {19, 0, 0, 1, MAIN_CORE_PROC, 1, CPU_USED}, + {20, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {21, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {22, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {23, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {24, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {25, 0, 0, 7, MAIN_CORE_PROC, 7, -1}, + {26, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {27, 0, 0, 9, MAIN_CORE_PROC, 9, -1}, + {28, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {29, 0, 0, 11, MAIN_CORE_PROC, 11, -1}, + {30, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {31, 0, 0, 13, MAIN_CORE_PROC, 13, -1}, + {32, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {33, 0, 0, 15, MAIN_CORE_PROC, 15, -1}, + {34, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {35, 0, 0, 17, MAIN_CORE_PROC, 17, -1}, + }, + {{16, MAIN_CORE_PROC, 1, 0, 0}, {16, HYPER_THREADING_PROC, 1, 0, 0}}, + { + {20}, {21}, {22}, {23}, {24}, {25}, {26}, {27}, {28}, {29}, {30}, {31}, {32}, {33}, {34}, {35}, + {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}, {14}, {15}, + }, + NOT_USED, +}; LinuxCpuReserveTestCase _1socket_32cores_hyper_1streams = { 32, 1, @@ -986,6 +1022,7 @@ INSTANTIATE_TEST_SUITE_P(CPUReserve, _1socket_16cores_hyper_4streams_ecoreonly, _1socket_18cores_hyper_1streams, _1socket_18cores_hyper_2streams, + _1socket_18cores_hyper_plugin_reserve_2threads, _1socket_32cores_hyper_1streams)); #endif } // namespace diff --git a/src/plugins/auto/tests/functional/CMakeLists.txt b/src/plugins/auto/tests/functional/CMakeLists.txt index cd239db8806120..b15afe68b96660 100644 --- a/src/plugins/auto/tests/functional/CMakeLists.txt +++ b/src/plugins/auto/tests/functional/CMakeLists.txt @@ -31,4 +31,4 @@ ov_add_test_target( ) target_compile_definitions(${TARGET_NAME} PRIVATE ${COMPILE_DEFINITIONS}) -set_ie_threading_interface_for(${TARGET_NAME}) \ No newline at end of file +ov_set_threading_interface_for(${TARGET_NAME}) \ No newline at end of file diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 10b82b2b75ffcb..8276d5a3188970 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -27,7 +27,7 @@ elseif(AARCH64) if(APPLE) # Apple M1 / M2 is assumed set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a) - add_definitions(-DOV_CPU_ARM_ENABLE_FP16) + add_compile_definitions(OV_CPU_ARM_ENABLE_FP16) else() set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8a) endif() diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 277bdb412563e7..90e65a15495719 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -255,10 +255,16 @@ void Config::readProperties(const std::map &prop, cons // when both execution_mode and inference_precision are specified if (!inferencePrecisionSetExplicitly) { if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { + inferencePrecision = ov::element::f32; +#if defined(OV_CPU_ARM_ENABLE_FP16) + //fp16 precision is used as default precision on ARM for non-convolution networks + //fp16 ACL convolution is slower than fp32 + if (modelType != ModelType::CNN) + inferencePrecision = ov::element::f16; +#else if (mayiuse(avx512_core_bf16)) inferencePrecision = ov::element::bf16; - else - inferencePrecision = ov::element::f32; +#endif } else { inferencePrecision = ov::element::f32; } diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 03fbe1a9923b7a..6f5a84701b184d 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -123,6 +123,7 @@ static const TypeToNameMap& get_type_to_name_tbl() { { "ScatterElementsUpdate", Type::ScatterElementsUpdate}, { "ScatterNDUpdate", Type::ScatterNDUpdate}, { "Interpolate", Type::Interpolate}, + { "RandomUniform", Type::RandomUniform}, { "ReduceL1", Type::Reduce}, { "ReduceL2", Type::Reduce}, { "ReduceLogicalAnd", Type::Reduce}, @@ -317,6 +318,7 @@ std::string NameFromType(const Type type) { CASE(PriorBox); CASE(PriorBoxClustered) CASE(MHA); + CASE(RandomUniform); CASE(Unique); CASE(Ngram); CASE(Unknown); diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 403ed62d482f8b..9afbe2d7485ddd 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -110,6 +110,7 @@ enum class Type { PriorBoxClustered, Interaction, MHA, + RandomUniform, Unique, Ngram }; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index ed18c01c848af1..5275259886ed91 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1709,7 +1709,10 @@ void Graph::EnforceInferencePrecision() { if (inferPrec == Precision::FP32) return; // nothing to do, only precision reduction is currently allowed - +#if defined(OV_CPU_ARM_ENABLE_FP16) + if (inferPrec == Precision::FP16) + return; // precision of configured by ov::pass::ConvertPrecision +#endif std::function& skipNodes)> searchForNodesToSkip; searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set& skipNodes) -> void { for (size_t i = 0; i < node->getParentEdges().size(); i++) { diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index e8fe6b89a00afc..ab02ae44dd6ce2 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -95,8 +95,6 @@ Node::Node(const std::shared_ptr& op, typeStr(op->get_type_name()), type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { - const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name(); - for (size_t i = 0; i < op->get_input_size(); i++) { const auto &shape = op->get_input_partial_shape(i); if (shape.rank().is_dynamic()) { @@ -480,6 +478,8 @@ std::string Node::getPrimitiveDescriptorType() const { SEARCH_TYPE(_dw); SEARCH_TYPE(_1x1); +#undef SEARCH_TYPE + if (type == impl_desc_type::unknown) str_type = "unknown"; else if (str_type.empty()) diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 5becbfa9863f70..864c08a95b04c6 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -41,6 +41,8 @@ #include "nodes/executors/mvn_list.hpp" #include "nodes/executors/executor.hpp" +#define THROW_CPU_NODE_ERR(...) OPENVINO_THROW(getTypeStr(), " node with name '", getName(), "' ", __VA_ARGS__) + namespace ov { namespace intel_cpu { @@ -353,7 +355,7 @@ class Node { inplace = InPlaceType::Unknown; } - std::string getPrimitiveDescriptorType() const; + virtual std::string getPrimitiveDescriptorType() const; PerfCount &PerfCounter() { return perfCounter; } diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp index 494fb6176dd65e..0b2ee382e2a2a9 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp @@ -56,11 +56,11 @@ void CTCGreedyDecoder::initSupportedPrimitiveDescriptors() { return; Precision inDataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); - if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16) + if (!one_of(inDataPrecision, Precision::FP32, Precision::BF16, Precision::FP16)) IE_THROW() << errorPrefix << "has unsupported 'data' input precision: " << inDataPrecision; Precision seqLenPrecision = getOriginalInputPrecisionAtPort(SEQUENCE_LENGTH_INDEX); - if (seqLenPrecision != Precision::FP32 && seqLenPrecision != Precision::BF16) + if (!one_of(inDataPrecision, Precision::FP32, Precision::BF16, Precision::FP16)) IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision; addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp index be695d85b8c6f2..8e6e0617cd75f2 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp @@ -55,7 +55,7 @@ void CTCGreedyDecoderSeqLen::initSupportedPrimitiveDescriptors() { return; Precision inDataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); - if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16) + if (!one_of(inDataPrecision, Precision::FP32, Precision::BF16, Precision::FP16)) IE_THROW() << errorPrefix << "has unsupported 'data' input precision: " << inDataPrecision; Precision seqLenPrecision = getOriginalInputPrecisionAtPort(SEQUENCE_LENGTH_INDEX); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp index cb5911e90836fb..f22004a0d3e154 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp @@ -10,6 +10,11 @@ namespace intel_cpu { using namespace arm_compute; +static std::mutex & get_mtx_ifunc() { + static std::mutex mtx_ifunc; + return mtx_ifunc; +} + inline VectorDims reshape_sizes(VectorDims dims) { const size_t MAX_NUM_SHAPE = arm_compute::MAX_DIMS; VectorDims result_dims(MAX_NUM_SHAPE - 1); @@ -494,6 +499,11 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto default: IE_THROW() << "Unsupported operation type for ACL Eltwise executor: " << static_cast(aclEltwiseAttrs.algorithm); } + + // We get a problem (seg. faults, data race etc) for eltwise operations when we use several configure(...) functions in parallel. + // We created issue about this problem here: https://github.com/ARM-software/ComputeLibrary/issues/1073 + // TODO: change it when we will get an answer to our question in issue + std::lock_guard _lock {get_mtx_ifunc()}; ifunc = exec_func(); return true; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.cpp new file mode 100644 index 00000000000000..c617363aefebf6 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2020-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "acl_ie_scheduler.hpp" + +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include + +namespace ov { +namespace intel_cpu { + +using namespace arm_compute; + +ACLScheduler::ACLScheduler() = default; + +unsigned int ACLScheduler::num_threads() const { + return parallel_get_num_threads(); +} + +void ACLScheduler::set_num_threads(unsigned int num_threads) {} + +void ACLScheduler::schedule_custom(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) { + const Window & max_window = window; + const unsigned int num_iterations = max_window.num_iterations_total(); + const auto _num_threads = std::min(num_iterations, static_cast(parallel_get_num_threads())); + + if (num_iterations == 0) { + return; + } + + std::function main_run; + if (tensors.empty()) { + main_run = [&](const Window &window, const ThreadInfo &info) { + kernel->run(window, info); + }; + } else { + main_run = [&](const Window &window, const ThreadInfo &info) { + kernel->run_op(tensors, window, info); + }; + } + + if (!kernel->is_parallelisable() || _num_threads == 1) { + ThreadInfo info; + info.cpu_info = &cpu_info(); + main_run(max_window, info); + } else { + const auto num_windows = _num_threads; + const auto hints_split_dimension = hints.split_dimension(); + + InferenceEngine::parallel_for(num_windows, [&](int wid) { + Window win = max_window.split_window(hints_split_dimension, wid, num_windows); + win.validate(); + main_run(win, {wid, static_cast(_num_threads), &cpu_info()}); + }); + } +} + +void ACLScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { + ITensorPack tensors; + schedule_custom(kernel, hints, kernel->window(), tensors); +} + +void ACLScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) { + schedule_custom(kernel, hints, window, tensors); +} + +void ACLScheduler::run_workloads(std::vector &workloads) { + InferenceEngine::parallel_for(workloads.size(), [&](int wid) { + workloads[wid]({wid, static_cast(parallel_get_num_threads()), &cpu_info()}); + }); +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.hpp new file mode 100644 index 00000000000000..1148f4ad5edd69 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2020-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include "support/Mutex.h" + +namespace ov { +namespace intel_cpu { + +using namespace arm_compute; + +class ACLScheduler final : public IScheduler { +public: + ACLScheduler(); + ~ACLScheduler() override = default; + std::uint32_t num_threads() const override; + void set_num_threads(unsigned int num_threads) override; + void schedule(ICPPKernel *kernel, const Hints &hints) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) override; +protected: + void run_workloads(std::vector &workloads) override; +private: + void schedule_custom(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors); +}; +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp index 72021a5c6c5812..9cd6e3c43df7b0 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp @@ -20,7 +20,8 @@ bool AclPoolingExecutor::isSupported(const TensorInfo& srcTensorInfo, DataLayout dataLayout, const VectorDims* indDims, PoolingLayerInfo* pool_info, - Pooling3dLayerInfo* pool3d_info) { + Pooling3dLayerInfo* pool3d_info, + bool ignoreOutShapeErrors) { unsigned int pad_left = (poolingAttrs.data_pad_begin.size() >= 2u) ? poolingAttrs.data_pad_begin[1] : poolingAttrs.data_pad_begin[0]; unsigned int pad_right = (poolingAttrs.data_pad_end.size() >= 2u) ? poolingAttrs.data_pad_end[1] : poolingAttrs.data_pad_end[0]; unsigned int pad_top = (poolingAttrs.data_pad_begin.size() >= 2u) ? poolingAttrs.data_pad_begin[0] : 0; @@ -46,7 +47,12 @@ bool AclPoolingExecutor::isSupported(const TensorInfo& srcTensorInfo, // The combination of parameters: NCHW + CEIL gives an accuracy problem in AvgPool. // One workaround is to disable the ACL executor for these parameters. // Then OneDNN will run this case in ACL backend as reorder -> NHWC -> reorder - if (dataLayout == arm_compute::DataLayout::NCHW && poolingAttrs.rounding == op::RoundingType::CEIL) return false; + if (pool_type == PoolingType::AVG && + dataLayout == arm_compute::DataLayout::NCHW && + poolingAttrs.rounding == op::RoundingType::CEIL) { + DEBUG_LOG("NCHW + CEIL gives an accuracy problem in ACL AvgPool. ACL executor will not be created."); + return false; + } DimensionRoundingType round = (poolingAttrs.rounding == op::RoundingType::CEIL) ? DimensionRoundingType::CEIL : DimensionRoundingType::FLOOR; @@ -82,12 +88,22 @@ bool AclPoolingExecutor::isSupported(const TensorInfo& srcTensorInfo, arm_compute::Status s = arm_compute::NEPoolingLayer::validate(&srcTensorInfo, &dstTensorInfo, *pool_info, &indTensorInfo); if (!s) { DEBUG_LOG("NEPoolingLayer validation with indices failed: ", s.error_description()); + if (ignoreOutShapeErrors && + s.error_description().find("Tensors have different shapes") != std::string::npos) { + DEBUG_LOG("Ignore shape error because the flag ignoreOutShapeErrors is set"); + return true; + } return false; } } else { arm_compute::Status s = arm_compute::NEPoolingLayer::validate(&srcTensorInfo, &dstTensorInfo, *pool_info); if (!s) { DEBUG_LOG("NEPoolingLayer validation without indices failed: ", s.error_description()); + if (ignoreOutShapeErrors && + s.error_description().find("Tensors have different shapes") != std::string::npos) { + DEBUG_LOG("Ignore shape error because the flag ignoreOutShapeErrors is set"); + return true; + } return false; } } diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp index 44a2e999057d32..2525ccb490468a 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp @@ -31,7 +31,8 @@ class AclPoolingExecutor : public PoolingExecutor { arm_compute::DataLayout dataLayout, const VectorDims* indDims, arm_compute::PoolingLayerInfo* pool_info, - arm_compute::Pooling3dLayerInfo* pool3d_info); + arm_compute::Pooling3dLayerInfo* pool3d_info, + bool ignoreOutShapeErrors = false); impl_desc_type getImplType() const override { return implType; @@ -79,8 +80,8 @@ class AclPoolingExecutorBuilder : public PoolingExecutorBuilder { if (dstDescs.size() == 2u && dstDescs[1]->getPrecision() != InferenceEngine::Precision::U32) { - DEBUG_LOG("AclPoolingExecutor does not support precisions:", - " dst[1]=", dstDescs[1]->getPrecision()); + DEBUG_LOG("AclPoolingExecutor supports U32 as indices precisions only. ", + "Passed indices precision: ", dstDescs[1]->getPrecision()); return false; } diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index 798b04078352bf..6868e907fa7ae8 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -10,9 +10,11 @@ #include using namespace InferenceEngine; -using namespace dnnl::impl::cpu; using namespace ov::intel_cpu; using namespace ov::intel_cpu::node; +#if defined(OPENVINO_ARCH_X86_64) +using namespace dnnl::impl::cpu; +#endif // OPENVINO_ARCH_X86_64 #define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' " @@ -23,10 +25,14 @@ bool GridSample::isSupportedOperation(const std::shared_ptr& op, errorMessage = "Not supported GridSample operation version. CPU plug-in supports only 9th version."; return false; } +#if defined(OPENVINO_ARCH_X86_64) if (!x64::mayiuse(x64::sse41)) { errorMessage = "Not supported CPU instructions set."; return false; } +#else + return false; +#endif // OPENVINO_ARCH_X86_64 } catch (...) { return false; } @@ -34,6 +40,8 @@ bool GridSample::isSupportedOperation(const std::shared_ptr& op, return true; } +#if defined(OPENVINO_ARCH_X86_64) + GridSample::GridSample(const std::shared_ptr& op, const GraphContext::CPtr context) : Node(op, context, NgraphShapeInferFactory(op, PortMask(1))) { std::string errorMessage; @@ -110,7 +118,7 @@ void GridSample::initSupportedPrimitiveDescriptors() { } void GridSample::createPrimitive() { - GridSampleKernelConfParams jcp; + kernel::GridSampleKernelConfParams jcp; jcp.inDataPrc = dataPrecision; jcp.gridPrc = gridPrecision; @@ -133,15 +141,13 @@ void GridSample::createPrimitive() { jcp.cannelNum = jcp.dynamicChannel ? 1lu : srcDataDims[1]; } -#if defined(OPENVINO_ARCH_X86_64) if (x64::mayiuse(x64::avx512_core)) { - jitKernel.reset(new GridSampleKernel(jcp)); + jitKernel.reset(new kernel::GridSampleKernel(jcp)); } else if (x64::mayiuse(x64::avx2)) { - jitKernel.reset(new GridSampleKernel(jcp)); + jitKernel.reset(new kernel::GridSampleKernel(jcp)); } else if (x64::mayiuse(x64::sse41)) { - jitKernel.reset(new GridSampleKernel(jcp)); + jitKernel.reset(new kernel::GridSampleKernel(jcp)); } -#endif // OPENVINO_ARCH_X86_64 if (!jitKernel) { THROW_ERROR << " could not create JIT kernel."; } @@ -268,7 +274,7 @@ void GridSample::execute(dnnl::stream strm) { auto threadBody = [&](const int ithr, const int nthr) { const auto& p = execParamsPerThread[ithr]; - auto arg = GridSamplesKernelExecArgs(); + auto arg = kernel::GridSamplesKernelExecArgs(); if (p.workAmount == 0lu) { return; } @@ -311,3 +317,5 @@ void GridSample::executeDynamicImpl(dnnl::stream strm) { bool GridSample::created() const { return getType() == Type::GridSample; } + +#endif // OPENVINO_ARCH_X86_64 diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp index 89a1a409764615..78b5f9d66710ca 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp @@ -72,7 +72,7 @@ class GridSample : public Node { static constexpr size_t IN_DATA = 0; static constexpr size_t IN_GRID = 1; - std::shared_ptr jitKernel; + std::shared_ptr jitKernel; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp index 7501dd606427ce..89e658a7d6a6fc 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,12 +8,13 @@ using namespace dnnl::impl::cpu; namespace ov { namespace intel_cpu { +namespace kernel { #define GET_OFF(field) offsetof(GridSamplesKernelExecArgs, field) template GridSampleKernel::GridSampleKernel(const GridSampleKernelConfParams& jcp) : - GridSampleKernelBase(jit_name(), jcp) { + GridSampleKernelBase(jit_name(), jcp, isa) { vlen = x64::cpu_isa_traits::vlen; dataTypeSize = jcp.inDataPrc.size(); gridTypeSize = jcp.gridPrc.size(); @@ -2085,5 +2086,6 @@ template class GridSampleKernel; template class GridSampleKernel; template class GridSampleKernel; +} // namespace kernel } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp index c24100259cd5bb..295c715fb8146b 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -14,6 +14,12 @@ namespace intel_cpu { enum class GridSampleInterpolationMode { BILINEAR, BICUBIC, NEAREST }; enum class GridSamplePaddingMode { ZEROS, BORDER, REFLECTION }; +namespace kernel { + +class GridSampleKernelBase; + +#if defined(OPENVINO_ARCH_X86_64) + struct GridSampleKernelConfParams { bool dynamicShapes = false; bool dynamicBatch = false; @@ -66,7 +72,8 @@ class GridSampleKernelBase: public JitKernelBase { assert(ker_); ker_(args); } - explicit GridSampleKernelBase(const char* name, const GridSampleKernelConfParams& jcp) : JitKernelBase(name), ker_(nullptr), jcp(jcp) {} + explicit GridSampleKernelBase(const char* name, const GridSampleKernelConfParams& jcp, dnnl::impl::cpu::x64::cpu_isa_t isa) + : JitKernelBase(name, isa), ker_(nullptr), jcp(jcp) {} virtual void create_ker() = 0; uint64_t getVecLen() { @@ -173,5 +180,8 @@ class GridSampleKernel : public GridSampleKernelBase { void hwShiftPs2dq(const Vmm& vDst, const Vmm& vHCoord, const Vmm& vWCoord, const Vmm& vWidth); }; +#endif // OPENVINO_ARCH_X86_64 + +} // namespace kernel } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp index 6afbecf143f27b..bc0daaf6e33e2a 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp @@ -1,172 +1,243 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "jit_kernel_base.hpp" -using namespace ov; -using namespace intel_cpu; using namespace dnnl::impl::cpu; +namespace ov { +namespace intel_cpu { +namespace kernel { -void JitKernelBase::uni_vfmsub132ps(const Xbyak::Xmm& vDst, - const Xbyak::Xmm& vSrc, +JitKernelBase::JitKernelBase(const char* name, x64::cpu_isa_t isa) + : x64::jit_generator(name, nullptr, x64::MAX_CODE_SIZE, true, isa), m_isa(isa) { + vlen = x64::isa_max_vlen(isa); +} + +void JitKernelBase::uni_vfmsub132ps(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vfmsub132ps(vDst, vSrc, op); + vfmsub132ps(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - assert(vDst.getIdx() != vSrc.getIdx()); - vmulps(vDst, vDst, op); - vsubps(vDst, vDst, vSrc); + assert(v_dst.getIdx() != v_src.getIdx()); + vmulps(v_dst, v_dst, op); + vsubps(v_dst, v_dst, v_src); } else { - assert(vDst.getIdx() != vSrc.getIdx()); - mulps(vDst, op); - subps(vDst, vSrc); + assert(v_dst.getIdx() != v_src.getIdx()); + mulps(v_dst, op); + subps(v_dst, v_src); } } -void JitKernelBase::uni_vfnmadd132ps(const Xbyak::Xmm& vDst, - const Xbyak::Xmm& vSrc, +void JitKernelBase::uni_vfnmadd132ps(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vfnmadd132ps(vDst, vSrc, op); + vfnmadd132ps(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - assert(vDst.getIdx() != vSrc.getIdx()); - vmulps(vDst, vDst, op); - vsubps(vDst, vSrc, vDst); + assert(v_dst.getIdx() != v_src.getIdx()); + vmulps(v_dst, v_dst, op); + vsubps(v_dst, v_src, v_dst); } else { - assert(vDst.getIdx() != vSrc.getIdx()); - mulps(vDst, op); - subps(vSrc, vDst); - movups(vDst, vSrc); + assert(v_dst.getIdx() != v_src.getIdx()); + mulps(v_dst, op); + subps(v_src, v_dst); + movups(v_dst, v_src); } } -void JitKernelBase::uni_vfmsub231ps(const Xbyak::Xmm& vDst, - const Xbyak::Xmm& vSrc, +void JitKernelBase::uni_vfmsub231ps(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vfmsub231ps(vDst, vSrc, op); + vfmsub231ps(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - assert(!vDst.isEqualIfNotInherited(op)); - vmulps(vSrc, vSrc, op); - vsubps(vDst, vSrc, vDst); + assert(!v_dst.isEqualIfNotInherited(op)); + vmulps(v_src, v_src, op); + vsubps(v_dst, v_src, v_dst); } else { - assert(!vDst.isEqualIfNotInherited(op)); - mulps(vSrc, op); - subps(vSrc, vDst); - movups(vDst, vSrc); + assert(!v_dst.isEqualIfNotInherited(op)); + mulps(v_src, op); + subps(v_src, v_dst); + movups(v_dst, v_src); } } -void JitKernelBase::uni_vpaddd(const Xbyak::Ymm& vDst, - const Xbyak::Ymm& vSrc, +void JitKernelBase::uni_vpaddd(const Xbyak::Ymm& v_dst, + const Xbyak::Ymm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vpaddd(vDst, vSrc, op); + vpaddd(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - Xbyak::Xmm xmmDst(vDst.getIdx()); - vmovups(vDst, vSrc); + Xbyak::Xmm xmmDst(v_dst.getIdx()); + vmovups(v_dst, v_src); if (op.isYMM()) { Xbyak::Ymm ymmOp(op.getIdx()); Xbyak::Xmm xmmOp(op.getIdx()); paddd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); paddd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); } else if (op.isMEM()) { const int vlen = x64::cpu_isa_traits::vlen; paddd(xmmDst, op.getAddress()); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); paddd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); } else { IE_THROW() << "Not supported operand type."; } } else if (isValidIsa(x64::sse41)) { - assert(vDst.getIdx() != vSrc.getIdx()); - paddd(vDst, op); + assert(v_dst.getIdx() != v_src.getIdx()); + paddd(v_dst, op); } else { IE_THROW() << "Not defined behavior for instruction 'vpaddd' in current instructions set."; } } -void JitKernelBase::uni_vpsubd(const Xbyak::Ymm& vDst, - const Xbyak::Ymm& vSrc, +void JitKernelBase::uni_vpaddq(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx2)) { + vpaddq(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + paddq(v_dst, op); + } +} + +void JitKernelBase::uni_vpsubd(const Xbyak::Ymm& v_dst, + const Xbyak::Ymm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vpsubd(vDst, vSrc, op); + vpsubd(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - Xbyak::Xmm xmmDst(vDst.getIdx()); - vmovups(vDst, vSrc); + Xbyak::Xmm xmmDst(v_dst.getIdx()); + vmovups(v_dst, v_src); if (op.isYMM()) { Xbyak::Ymm ymmOp(op.getIdx()); Xbyak::Xmm xmmOp(op.getIdx()); psubd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); psubd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); } else if (op.isMEM()) { const int vlen = x64::cpu_isa_traits::vlen; psubd(xmmDst, op.getAddress()); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); psubd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); } else { IE_THROW() << "Not supported operand type."; } } else if (isValidIsa(x64::sse41)) { - assert(vDst.getIdx() != vSrc.getIdx()); - psubd(vDst, op); + assert(v_dst.getIdx() != v_src.getIdx()); + psubd(v_dst, op); } else { IE_THROW() << "Not defined behavior for instruction 'vpsubd' in current instructions set."; } } -void JitKernelBase::uni_vdivps(const Xbyak::Xmm& vDst, +void JitKernelBase::uni_vsubpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vsubpd(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + subpd(v_dst, op); + } +} + +void JitKernelBase::uni_vmulpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vmulpd(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + mulpd(v_dst, op); + } +} + +void JitKernelBase::uni_vpmuludq(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx2)) { + vpmuludq(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + pmuludq(v_dst, op); + } +} + +void JitKernelBase::uni_vdivps(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op1, const Xbyak::Operand& op2) { if (isValidIsa(x64::avx)) { - vdivps(vDst, op1, op2); + vdivps(v_dst, op1, op2); + } else { + if (!v_dst.isEqualIfNotInherited(op1)) { + movups(v_dst, op1); + } + divps(v_dst, op2); + } +} + +void JitKernelBase::uni_vdivpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vdivpd(v_dst, v_src, op); } else { - if (!vDst.isEqualIfNotInherited(op1)) { - movups(vDst, op1); + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); } - divps(vDst, op2); + divpd(v_dst, op); } } -void JitKernelBase::uni_vandps(const Xbyak::Xmm& vDst, +void JitKernelBase::uni_vandps(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op) { if (isValidIsa(x64::avx)) { - vandps(vDst, vSrs, op); + vandps(v_dst, vSrs, op); } else { - if (!vDst.isEqualIfNotInherited(vSrs)) { - movups(vDst, vSrs); + if (!v_dst.isEqualIfNotInherited(vSrs)) { + movups(v_dst, vSrs); } - andps(vDst, op); + andps(v_dst, op); } } -void JitKernelBase::uni_vandnps(const Xbyak::Xmm& vDst, +void JitKernelBase::uni_vandnps(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op) { if (isValidIsa(x64::avx)) { - vandnps(vDst, vSrs, op); + vandnps(v_dst, vSrs, op); } else { - if (!vDst.isEqualIfNotInherited(vSrs)) { - movups(vDst, vSrs); + if (!v_dst.isEqualIfNotInherited(vSrs)) { + movups(v_dst, vSrs); } - andnps(vDst, op); + andnps(v_dst, op); } } -void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, +void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Xmm& vSrcShift, const Xbyak::Opmask& kReadMask, @@ -178,28 +249,28 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, if (!useMask) kxnord(kReadMask, kReadMask, kReadMask); if (zeroFill) - uni_vpxor(vDst, vDst, vDst); + uni_vpxor(v_dst, v_dst, v_dst); - vpgatherdd(vDst | kReadMask, ptr[rSrcPtr + vSrcShift]); + vpgatherdd(v_dst | kReadMask, ptr[rSrcPtr + vSrcShift]); } -void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, +void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Xmm& vSrcShift, const Xbyak::Xmm& vReadMask, const bool useMask, const bool zeroFill) { - if (vDst.getIdx() == vSrcShift.getIdx() || vDst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { + if (v_dst.getIdx() == vSrcShift.getIdx() || v_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same."; } if (zeroFill) - pxor(vDst, vDst); // Don't use vpxor. It zeros the rest of the YMM register. + pxor(v_dst, v_dst); // Don't use vpxor. It zeros the rest of the YMM register. if (isValidIsa(x64::avx2)) { if (!useMask) uni_vpcmpeqd(vReadMask, vReadMask, vReadMask); - vpgatherdd(vDst, ptr[rSrcPtr + vSrcShift], vReadMask); + vpgatherdd(v_dst, ptr[rSrcPtr + vSrcShift], vReadMask); } else { auto rAux = getReg64(); Xbyak::Reg32 r32Aux = Xbyak::Reg32(rAux.getIdx()); @@ -213,7 +284,7 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, je(lLoopNext, T_NEAR); } uni_vpextrd(r32Aux, vSrcShift, i); - pinsrd(vDst, ptr[rSrcPtr + rAux], i); + pinsrd(v_dst, ptr[rSrcPtr + rAux], i); if (useMask) L(lLoopNext); @@ -221,30 +292,30 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, } } -void JitKernelBase::gatherdd(const Xbyak::Ymm& vDst, +void JitKernelBase::gatherdd(const Xbyak::Ymm& v_dst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Ymm& vSrcShift, const Xbyak::Ymm& vReadMask, const bool useMask, const bool zeroFill) { - if (vDst.getIdx() == vSrcShift.getIdx() || vDst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { + if (v_dst.getIdx() == vSrcShift.getIdx() || v_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same."; } if (isValidIsa(x64::avx2)) { if (!useMask) uni_vpcmpeqd(vReadMask, vReadMask, vReadMask); if (zeroFill) - uni_vpxor(vDst, vDst, vDst); + uni_vpxor(v_dst, v_dst, v_dst); - vpgatherdd(vDst, ptr[rSrcPtr + vSrcShift], vReadMask); + vpgatherdd(v_dst, ptr[rSrcPtr + vSrcShift], vReadMask); } else { - Xbyak::Xmm xmmDst = Xbyak::Xmm(vDst.getIdx()), + Xbyak::Xmm xmmDst = Xbyak::Xmm(v_dst.getIdx()), xmmSrcShft = Xbyak::Xmm(vSrcShift.getIdx()), xmmReadMask = Xbyak::Xmm(vReadMask.getIdx()); for (uint8_t i = 0; i < 2; i++) { gatherdd(xmmDst, rSrcPtr, xmmSrcShft, xmmReadMask, useMask, zeroFill); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(vSrcShift, vSrcShift, vSrcShift, 0x1); if (useMask) vperm2f128(vReadMask, vReadMask, vReadMask, 0x1); @@ -252,6 +323,15 @@ void JitKernelBase::gatherdd(const Xbyak::Ymm& vDst, } } +void JitKernelBase::uni_vpbroadcastq(const Xbyak::Xmm &x, const Xbyak::Operand &op) { + if (isValidIsa(x64::avx2)) { + vpbroadcastq(x, op); + } else { + movsd(x, op); + shufpd(x, x, 0x0); + } +} + void JitKernelBase::uni_vpbroadcastd(const Xbyak::Xmm &x, const Xbyak::Operand &op) { if (isValidIsa(x64::avx2)) { vpbroadcastd(x, op); @@ -285,6 +365,57 @@ void JitKernelBase::uni_vpbroadcastd(const Xbyak::Ymm &x, const Xbyak::Operand & } } +void JitKernelBase::uni_vroundpd(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op, const uint8_t imm) { + if (isValidIsa(x64::avx512_core)) { + vrndscalepd(v_dst, op, imm & 0x3); + } else if (isValidIsa(x64::avx)) { + vroundpd(v_dst, op, imm); + } else { + roundpd(v_dst, op, imm); + } +} + +void JitKernelBase::uni_vcvtdq2pd(const Xbyak::Xmm& v_dst, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vcvtdq2pd(v_dst, op); + } else { + cvtdq2pd(v_dst, op); + } +} + +void JitKernelBase::uni_vcvtpd2dq(const Xbyak::Xmm& v_dst, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vcvtpd2dq(v_dst, op); + } else { + cvtpd2dq(v_dst, op); + } +} + +void JitKernelBase::uni_vpmovzxdq(const Xbyak::Xmm& v_dst, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx2)) { + vpmovzxdq(v_dst, op); + } else { + pmovzxdq(v_dst, op); + } +} + +void JitKernelBase::uni_vshufpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op, + uint8_t imm) { + if (isValidIsa(x64::avx)) { + vshufpd(v_dst, v_src, op, imm); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + shufpd(v_dst, op, imm); + } +} + void JitKernelBase::fillRestWorkMask(const Xbyak::Opmask& dstMask, const Xbyak::Reg64& rWorkRest) { auto rOnes = getReg64(); @@ -362,7 +493,7 @@ void JitKernelBase::fillRestWorkMask(const Xbyak::Ymm& ymmDstMask, L(lEnd); } -void JitKernelBase::load(const Xbyak::Xmm& vDst, +void JitKernelBase::load(const Xbyak::Xmm& v_dst, const Xbyak::Address& srcAddr, const Xbyak::Reg64& rLoadNum, const size_t typeSize, @@ -373,7 +504,7 @@ void JitKernelBase::load(const Xbyak::Xmm& vDst, const uint8_t elPerVec = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; if (zeroFilling) - pxor(vDst, vDst); + pxor(v_dst, v_dst); for (uint8_t i = 0; i < elPerVec; i++) { cmp(rLoadNum, i); @@ -381,18 +512,18 @@ void JitKernelBase::load(const Xbyak::Xmm& vDst, const size_t offset = i * typeSize; if (typeSize == 1) - pinsrb(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrb(v_dst, ptr[srcAddr.getRegExp() + offset], i); else if (typeSize == 2) - pinsrw(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrw(v_dst, ptr[srcAddr.getRegExp() + offset], i); else if (typeSize == 4) - pinsrd(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrd(v_dst, ptr[srcAddr.getRegExp() + offset], i); else if (typeSize == 8) - pinsrq(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrq(v_dst, ptr[srcAddr.getRegExp() + offset], i); } L(lEnd); } -void JitKernelBase::load(const Xbyak::Ymm& vDst, +void JitKernelBase::load(const Xbyak::Ymm& v_dst, const Xbyak::Address& srcAddr, const Xbyak::Reg64& rLoadNum, const size_t typeSize, @@ -403,8 +534,8 @@ void JitKernelBase::load(const Xbyak::Ymm& vDst, const size_t elPerXmm = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; if (zeroFilling) - uni_vpxor(vDst, vDst, vDst); - Xbyak::Xmm xmmDst(vDst.getIdx()); + uni_vpxor(v_dst, v_dst, v_dst); + Xbyak::Xmm xmmDst(v_dst.getIdx()); for (size_t i = 0lu; i < 2lu; i++) { Xbyak::Label lPerm; @@ -427,13 +558,13 @@ void JitKernelBase::load(const Xbyak::Ymm& vDst, } L(lPerm); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); } L(lEnd); } void JitKernelBase::store(const Xbyak::Address& dstAddr, - const Xbyak::Xmm& vSrc, + const Xbyak::Xmm& v_src, const Xbyak::Reg64& rToStoreNum, const size_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { @@ -448,27 +579,27 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr, const size_t offset = i * typeSize; if (typeSize == 1) { - uni_vpextrb(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrb(ptr[dstAddr.getRegExp() + offset], v_src, i); } else if (typeSize == 2) { - uni_vpextrw(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrw(ptr[dstAddr.getRegExp() + offset], v_src, i); } else if (typeSize == 4) { - uni_vpextrd(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrd(ptr[dstAddr.getRegExp() + offset], v_src, i); } else if (typeSize == 8) { - uni_vpextrq(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrq(ptr[dstAddr.getRegExp() + offset], v_src, i); } } L(lEnd); } void JitKernelBase::store(const Xbyak::Address& dstAddr, - const Xbyak::Ymm& vSrc, + const Xbyak::Ymm& v_src, const Xbyak::Reg64& rToStoreNum, const size_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { IE_THROW() << "Could not store data with type size " << typeSize; } Xbyak::Label lEnd; - Xbyak::Xmm xmmSrc(vSrc.getIdx()); + Xbyak::Xmm xmmSrc(v_src.getIdx()); const size_t elPerXmm = x64::cpu_isa_traits::vlen / typeSize; for (int i = 0; i < 2; i++) { @@ -493,7 +624,7 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr, } L(lPerm); - vperm2f128(vSrc, vSrc, vSrc, 0x1); + vperm2f128(v_src, v_src, v_src, 0x1); } L(lEnd); } @@ -575,3 +706,7 @@ void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst, } L(lEnd); } + +} // namespace kernel +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp index e39efde753bbbc..f17eb9a02d8771 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp @@ -1,14 +1,23 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include "openvino/core/visibility.hpp" + +#if defined(OPENVINO_ARCH_X86_64) #include "cpu/x64/jit_generator.hpp" #include "registers_pool.hpp" +#endif // OPENVINO_ARCH_X86_64 namespace ov { namespace intel_cpu { +namespace kernel { + +class JitKernelBase; + +#if defined(OPENVINO_ARCH_X86_64) #define getReg64() RegistersPool::Reg(registersPool) #define getReg32() RegistersPool::Reg(registersPool) @@ -17,7 +26,11 @@ namespace intel_cpu { class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { public: - JitKernelBase(const char* name) : dnnl::impl::cpu::x64::jit_generator(name) {} + JitKernelBase(const char* name, dnnl::impl::cpu::x64::cpu_isa_t max_cpu_isa); + + dnnl::impl::cpu::x64::cpu_isa_t getIsa() { return m_isa; } + + size_t getVectorLen() { return vlen; } void uni_vfmsub132ps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op); @@ -31,14 +44,24 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { void uni_vpaddd(const Xbyak::Ymm& vDst, const Xbyak::Ymm& vSrc, const Xbyak::Operand& op); + void uni_vpaddq(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op); + void uni_vpsubd(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op) { jit_generator::uni_vpsubd(vDst, vSrc, op); } void uni_vpsubd(const Xbyak::Ymm& vDst, const Xbyak::Ymm& vSrc, const Xbyak::Operand& op); + void uni_vsubpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src, const Xbyak::Operand& op); + + void uni_vmulpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src, const Xbyak::Operand& op); + + void uni_vpmuludq(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& op_1, const Xbyak::Operand& op_2); + void uni_vdivps(const Xbyak::Xmm& vDst, const Xbyak::Operand& op1, const Xbyak::Operand& op2); + void uni_vdivpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src, const Xbyak::Operand& op2); + void uni_vandps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op); void uni_vandnps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op); @@ -63,6 +86,18 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { void uni_vpbroadcastd(const Xbyak::Ymm &x, const Xbyak::Operand &op); + void uni_vpbroadcastq(const Xbyak::Xmm &x, const Xbyak::Operand &op); + + void uni_vroundpd(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op, const uint8_t imm); + + void uni_vcvtdq2pd(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op); + + void uni_vcvtpd2dq(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op); + + void uni_vpmovzxdq(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op); + + void uni_vshufpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_srs, const Xbyak::Operand& op, uint8_t imm); + void gatherdd(const Xbyak::Xmm& vDst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Xmm& vSrcShift, @@ -140,7 +175,9 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { return dnnl::impl::cpu::x64::mayiuse(isa); } + const dnnl::impl::cpu::x64::cpu_isa_t m_isa; RegistersPool::Ptr registersPool; + size_t vlen; enum { // Comparison predicate operand (immediate byte) for single-precision floating-point values. @@ -155,5 +192,70 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { }; }; +template +class JitKernel : public JitKernelBase { +public: + using KernelFunc = void (*)(const CallArgs *); + + explicit JitKernel(const char* name, const CompileParams& jcp, dnnl::impl::cpu::x64::cpu_isa_t max_cpu_isa) + : JitKernelBase{name, max_cpu_isa}, m_jcp{jcp}, m_func{nullptr} {} + + ~JitKernel() override = default; + + dnnl::impl::status_t create_kernel() override { + const dnnl::impl::status_t code = jit_generator::create_kernel(); + if (code != dnnl::impl::status::success) { + OPENVINO_THROW("Could not create kernel. Error code: ", std::to_string(code), ". ", + "Xbyak error code: ", Xbyak::ConvertErrorToString(Xbyak::GetError())); + } + m_func = (decltype(m_func))jit_ker(); + return code; + } + + void operator()(const CallArgs* args) const { + assert(m_func); + m_func(args); + } + + void operator()(const CallArgs& args) const { + this->operator()(&args); + } + + template class KernelT> + static std::shared_ptr> createInstance(const CompileParams& jcp) { + std::shared_ptr> res; + + try { +#define IF_ISA_CASE(ISA) \ + if (dnnl::impl::cpu::x64::mayiuse(ISA)) \ + res.reset(new KernelT(jcp)); \ + else + + IF_ISA_CASE(dnnl::impl::cpu::x64::avx512_core) + IF_ISA_CASE(dnnl::impl::cpu::x64::avx2) + IF_ISA_CASE(dnnl::impl::cpu::x64::sse41); + +#undef IF_ISA_CASE + + if (res) { + res->create_kernel(); + } + } catch (...) { + return nullptr; + } + + return res; + } + +protected: + CompileParams m_jcp; + +private: + KernelFunc m_func; +}; + +#endif // OPENVINO_ARCH_X86_64 + +} // namespace kernel } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.cpp new file mode 100644 index 00000000000000..301c2f7e08ff69 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.cpp @@ -0,0 +1,635 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" + +using namespace dnnl::impl::cpu; + +namespace ov { +namespace intel_cpu { +namespace kernel { + +#define GET_OFF(field) offsetof(RandomUniformCallArgs, field) + +template +RandomUniform::RandomUniform(const RandomUniformCompileParams& jcp) : + JitKernel(jit_name(), jcp, isa) { +} + +template +void RandomUniform::generate() { + this->preamble(); + registersPool = RegistersPool::create(isa, {rax, rcx, rsp, rdi, k0}); + + r64_dst = getReg64(); + r64_work_amount = getReg64(); + + mov(r64_work_amount, ptr[r64_params + GET_OFF(work_amount)]); + mov(r64_dst, ptr[r64_params + GET_OFF(dst_ptr)]); + + initVectors(); + process(); + + registersPool.reset(); + this->postamble(); +} + +template <> +void RandomUniform::initVectors() { + const auto r64_aux = getReg64(); + const auto r32_aux = Xbyak::Reg32(r64_aux.getIdx()); + const auto r16_aux = Xbyak::Reg16(r64_aux.getIdx()); + + v_max_mul_n_64 = getVmm(); + v_max_mul_c_64 = getVmm(); + v_add_low_k = getVmm(); + v_add_up_k = getVmm(); + v_n_inc = getVmm(); + v_range = getVmm(); + v_min = getVmm(); + v_key_64 = getVmm(); + v_counter_64 = getVmm(); + v_n_64 = getVmm(); + v_res_perm = getVmm(); + + if (m_jcp.out_data_type.is_real()) { + v_convert_0 = getVmm(); + v_convert_1 = getVmm(); + } + + // Initialize constants. +#define BROADCAST_R(F, V, R, C) \ + mov(R, C); \ + F(V, R); +#define BROADCAST_P(F, V, R, C) \ + mov(R, ptr[r64_params + GET_OFF(C)]); \ + F(V, ptr[R]); + + BROADCAST_R(vpbroadcastq, v_max_mul_n_64, r64_aux, STATISTIC_MAXIMIZING_MULTIPLIER_N) + BROADCAST_R(vpbroadcastq, v_max_mul_c_64, r64_aux, STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER) + BROADCAST_R(vpbroadcastd, v_add_low_k, r32_aux, CRUSH_RESISTANCE_CONST_LOWER_VALUE) + BROADCAST_R(vpbroadcastd, v_add_up_k, r32_aux, CRUSH_RESISTANCE_CONST_UPPER_VALUE) + BROADCAST_R(vpbroadcastq, v_n_inc, r64_aux, 0x00000008) + + if (m_jcp.out_data_type == element::f32) { + BROADCAST_R(vpbroadcastd, v_convert_0, r32_aux, 0x3f800000) + BROADCAST_R(vpbroadcastd, v_convert_1, r32_aux, 0x007fffff) + BROADCAST_P(vpbroadcastd, v_range, r64_aux, range_ptr) + BROADCAST_P(vpbroadcastd, v_min, r64_aux, min_ptr) + } else if (m_jcp.out_data_type == element::f16 && x64::mayiuse(x64::avx512_core_fp16)) { + BROADCAST_R(vpbroadcastw, v_convert_0, r16_aux, 0x3c00) + BROADCAST_R(vpbroadcastw, v_convert_1, r16_aux, 0x03ff) + BROADCAST_P(vpbroadcastw, v_range, r64_aux, range_ptr) + BROADCAST_P(vpbroadcastw, v_min, r64_aux, min_ptr) + } else if (m_jcp.out_data_type == element::bf16 && x64::mayiuse(x64::avx512_core_bf16)) { + v_convert_2 = getVmm(); + const auto ymm_min = Xbyak::Ymm(v_min.getIdx()); + const auto ymm_range = Xbyak::Ymm(v_range.getIdx()); + + BROADCAST_R(vpbroadcastw, v_convert_0, r16_aux, 0x3f80) + BROADCAST_R(vpbroadcastw, v_convert_1, r16_aux, 0x007f) + BROADCAST_R(vpbroadcastd, v_convert_2, r32_aux, 0x3f800000) + + BROADCAST_P(vpbroadcastw, v_range, r64_aux, range_ptr) + vpmovzxwd(v_range, ymm_range); + uni_vpslld(v_range, v_range, 16); + + BROADCAST_P(vpbroadcastw, v_min, r64_aux, min_ptr) + vpmovzxwd(v_min, ymm_min); + uni_vpslld(v_min, v_min, 16); + } else if (m_jcp.out_data_type == element::i32) { + const auto ymm_range = Xbyak::Ymm(v_range.getIdx()); + + BROADCAST_P(vpbroadcastd, v_range, r64_aux, range_ptr) + BROADCAST_P(vpbroadcastd, v_min, r64_aux, min_ptr) + + uni_vcvtdq2pd(v_range, ymm_range); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + + // Initialize inputs. + BROADCAST_P(vpbroadcastq, v_key_64, r64_aux, key_ptr) + BROADCAST_P(vpbroadcastq, v_counter_64, r64_aux, counter_ptr) + BROADCAST_P(vpbroadcastq, v_n_64, r64_aux, n_ptr) + + if (m_jcp.out_data_type.size() <= 4) { + static const uint64_t n_inc_arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } else { + static const uint64_t n_inc_arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; // TODO: i64 + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } + uni_vpaddq(v_n_64, v_n_64, ptr[r64_aux]); + + // Initialize auxiliary vectors. + static const uint32_t res_perm_mask[16] = { 0b00000000, 0b00010000, 0b00001000, 0b00011000, 0b00000010, 0b00010010, 0b00001010, 0b00011010, + 0b00000100, 0b00010100, 0b00001100, 0b00011100, 0b00000110, 0b00010110, 0b00001110, 0b00011110 }; + mov(r64_aux, reinterpret_cast(res_perm_mask)); + uni_vmovups(v_res_perm, ptr[r64_aux]); + + if (m_jcp.out_data_type == element::f16 && x64::mayiuse(x64::avx512_core_fp16)) { + v_perm_16 = getVmm(); + static const uint16_t perm_16[32] = { 0b00000000, 0b00000010, 0b00000100, 0b00000110, 0b00001000, 0b00001010, 0b00001100, 0b00001110, + 0b00010000, 0b00010010, 0b00010100, 0b00010110, 0b00011000, 0b00011010, 0b00011100, 0b00011110, + 0b00100000, 0b00100010, 0b00100100, 0b00100110, 0b00101000, 0b00101010, 0b00101100, 0b00101110, + 0b00110000, 0b00110010, 0b00110100, 0b00110110, 0b00111000, 0b00111010, 0b00111100, 0b00111110 }; + mov(r64_aux, reinterpret_cast(perm_16)); + uni_vmovups(v_perm_16, ptr[r64_aux]); + } + +#undef BROADCAST_R +#undef BROADCAST_P +} + +template // Works for AVX2, SSE41 +void RandomUniform::initVectors() { + const auto r64_aux = getReg64(); + + v_max_mul_n_64 = getVmm(); + v_max_mul_c_64 = getVmm(); + v_add_low_k = getVmm(); + v_add_up_k = getVmm(); + v_range = getVmm(); + v_key_64 = getVmm(); + v_counter_64 = getVmm(); + v_n_64 = getVmm(); + + r64_n_inc = getReg64(); + r64_min = getReg64(); + +#define INIT_ARR(A, V, R, T) \ + static const T A[8] = { V, V, V, V, V, V, V, V }; \ + if (isa == x64::avx2) { \ + mov(R, reinterpret_cast(A)); \ + } else { \ + static const T* A##_aligned = A + (reinterpret_cast(A) % 16) / sizeof(T); \ + mov(R, reinterpret_cast(A##_aligned)); \ + } + + // Initialize constants. + INIT_ARR(max_mul_n_64, STATISTIC_MAXIMIZING_MULTIPLIER_N, r64_aux, uint64_t); + uni_vmovups(v_max_mul_n_64, ptr[r64_aux]); + + INIT_ARR(max_mul_c_64, STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER, r64_aux, uint64_t); + uni_vmovups(v_max_mul_c_64, ptr[r64_aux]); + + INIT_ARR(add_low_k, CRUSH_RESISTANCE_CONST_LOWER_VALUE, r64_aux, uint32_t); + uni_vmovups(v_add_low_k, ptr[r64_aux]); + + INIT_ARR(add_up_k, CRUSH_RESISTANCE_CONST_UPPER_VALUE, r64_aux, uint32_t); + uni_vmovups(v_add_up_k, ptr[r64_aux]); + + INIT_ARR(n_inc_step, isa == x64::avx2 ? 4 : 2, r64_n_inc, uint64_t); + + if (m_jcp.out_data_type == element::f32) { + r64_convert_0 = getReg64(); + r64_convert_1 = getReg64(); + + INIT_ARR(convert_0, 0x3f800000, r64_convert_0, uint32_t); + INIT_ARR(convert_1, 0x007fffff, r64_convert_1, uint32_t); + + mov(r64_aux, ptr[r64_params + GET_OFF(range_ptr)]); + uni_vpbroadcastd(v_range, ptr[r64_aux]); + + auto v_aux = getVmm(); + mov(r64_aux, ptr[r64_params + GET_OFF(min_ptr)]); + uni_vpbroadcastd(v_aux, ptr[r64_aux]); + static uint32_t min_arr[8]; + mov(r64_min, reinterpret_cast(min_arr)); + uni_vmovups(ptr[r64_min], v_aux); + } else if (m_jcp.out_data_type == element::i32) { + r64_f64_pow_52 = getReg64(); + const auto v_aux = getVmm(); + const auto xmm_range = Xbyak::Xmm(v_range.getIdx()); + + INIT_ARR(f64_pow_52, 0x4330000000000000, r64_f64_pow_52, uint64_t); + + mov(r64_aux, ptr[r64_params + GET_OFF(range_ptr)]); + uni_vpbroadcastd(v_range, ptr[r64_aux]); + + mov(r64_aux, ptr[r64_params + GET_OFF(min_ptr)]); + uni_vpbroadcastd(v_aux, ptr[r64_aux]); + static uint32_t min_arr[8]; + mov(r64_min, reinterpret_cast(min_arr)); + uni_vmovups(ptr[r64_min], v_aux); + + uni_vcvtdq2pd(v_range, xmm_range); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + + // Initialize inputs. + mov(r64_aux, ptr[r64_params + GET_OFF(key_ptr)]); + uni_vpbroadcastq(v_key_64, ptr[r64_aux]); + + mov(r64_aux, ptr[r64_params + GET_OFF(counter_ptr)]); + uni_vpbroadcastq(v_counter_64, ptr[r64_aux]); + + mov(r64_aux, ptr[r64_params + GET_OFF(n_ptr)]); + uni_vpbroadcastq(v_n_64, ptr[r64_aux]); + + if (m_jcp.out_data_type.size() <= 4) { + if (isa == x64::avx2) { + static const uint64_t n_inc_arr[4] = { 0, 1, 2, 3 }; + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } else { + static uint64_t n_inc_arr[4]; + static uint64_t* n_inc_arr_aligned = n_inc_arr + (reinterpret_cast(n_inc_arr) % 16) / sizeof(uint64_t); + n_inc_arr_aligned[0] = 0; + n_inc_arr_aligned[1] = 1; + mov(r64_aux, reinterpret_cast(n_inc_arr_aligned)); + } + } else { + static const uint64_t n_inc_arr[4] = { 0, 1, 2, 3 }; // TODO: i64 + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } + + uni_vpaddq(v_n_64, v_n_64, ptr[r64_aux]); + +#undef INIT_ARR +} + +template +void RandomUniform::process() { + auto v_dst_0 = getVmm(); + auto v_dst_1 = getVmm(); + std::vector v_res{ v_dst_0, v_dst_1 }; + + auto step = vlen; + if (one_of(m_jcp.out_data_type.size(), 2lu, 4lu)) { + step = vlen * 2 / sizeof(uint32_t); + } else if (m_jcp.out_data_type.size() == 8) { + step = vlen / sizeof(uint32_t); + } + + Xbyak::Label l_loop, l_tail; + L(l_loop); { + cmp(r64_work_amount, step); + jl(l_tail, T_NEAR); + + runPhilox(v_res, v_key_64, v_counter_64, v_n_64); + convert(v_res, v_res); + + uni_vmovups(ptr[r64_dst], v_dst_0); + add(r64_dst, vlen); + if (one_of(m_jcp.out_data_type.size(), 4lu, 8lu)) { + uni_vmovups(ptr[r64_dst], v_dst_1); + add(r64_dst, vlen); + } + + if (isa == x64::avx512_core) { + uni_vpaddd(v_n_64, v_n_64, v_n_inc); + } else { + uni_vpaddd(v_n_64, v_n_64, ptr[r64_n_inc]); + } + + sub(r64_work_amount, step); + jmp(l_loop, T_NEAR); + } + + L(l_tail); + tail(v_res); +} + +template +void RandomUniform::calculateRound(const Vmm& vmm_k_0, const Vmm& vmm_k_1, const Vmm& vmm_c_0, const Vmm& vmm_c_1, + const Vmm& vmm_n_0, const Vmm& vmm_n_1, const Vmm& vmm_aux_0, const Vmm& vmm_aux_1) { + uni_vpmuludq(vmm_aux_0, vmm_n_0, v_max_mul_n_64); // {p0,p1,p0,p1} = {n0,_,n0,_} * {m0,_,m0,_} + uni_vpmuludq(vmm_aux_1, vmm_c_0, v_max_mul_c_64); // {r0,r1,r0,r1} = {c0,_,c0,_} * {m0,_,m0,_} + + uni_vpshufd(vmm_c_0, vmm_aux_0, 0b10110001); // {p1,p0,p1,p0} = shuf {p0,p1,p0,p1} + uni_vxorps(vmm_c_0, vmm_c_0, vmm_c_1); // {c0,_,c0,_} = {p1,_,p1,_} ^ {c1,_,c1,_} + uni_vxorps(vmm_c_0, vmm_c_0, vmm_k_1); // {c0,_,c0,_} = {c0,_,c0,_} ^ {k1,_,k1,_} + + uni_vpshufd(vmm_n_0, vmm_aux_1, 0b10110001); // {r1,r0,r1,r0} = shuf {r0,r1,r0,r1} + uni_vxorps(vmm_n_0, vmm_n_0, vmm_n_1); // {n0,_,n0,_} = {r1,_,r1,_} ^ {n1,_,n1,_} + uni_vxorps(vmm_n_0, vmm_n_0, vmm_k_0); // {n0,_,n0,_} = {n0,_,n0,_} ^ {k0,_,k0,_} +} + +template +void RandomUniform::runPhilox(const std::vector& vmm_dst, const Vmm& vmm_key, const Vmm& vmm_counter, const Vmm& vmm_n) { + auto vmm_k_0 = getVmm(); + auto vmm_k_1 = getVmm(); + auto vmm_n_0 = getVmm(); + auto vmm_n_1 = vmm_dst[0]; + auto vmm_c_0 = getVmm(); + auto vmm_c_1 = getVmm(); + auto vmm_aux_0 = getVmm(); + auto vmm_aux_1 = vmm_dst[1]; + + uni_vmovups(vmm_k_0, vmm_key); // {k0,k1,k0,k1} -> {k0,_,k0,_} + uni_vpshufd(vmm_k_1, vmm_key, 0b10110001); // {k0,k1,k0,k1} -> {k1,_,k1,_} + + uni_vpmuludq(vmm_aux_0, vmm_n, v_max_mul_n_64); // {p0,p1,p0,p1} = {n0,_,n0,_} * {m0,_,m0,_} + uni_vpmuludq(vmm_aux_1, vmm_counter, v_max_mul_c_64); // {r0,r1,r0,r1} = {c0,_,c0,_} * {m0,_,m0,_} + + uni_vxorps(vmm_c_0, vmm_aux_0, vmm_counter); // {_,c0,_,c0} = {_,p1,_,p1} ^ {_,c1,_,c1} + uni_vxorps(vmm_c_0, vmm_c_0, vmm_key); // {_,c0,_,c0} = {_,c0,_,c0} ^ {_,k1,_,k1} + uni_vpshufd(vmm_c_0, vmm_c_0, 0b10110001); // {_,c0,_,c0} -> {c0,_,c0,_} + + uni_vxorps(vmm_n_0, vmm_aux_1, vmm_n); // {_,n0,_,n0} = {_,r1,_,r1} ^ {_,n1,_,n1} + uni_vpshufd(vmm_n_0, vmm_n_0, 0b10110001); // {_,n0,_,n0} -> {n0,_,n0,_} + uni_vxorps(vmm_n_0, vmm_n_0, vmm_key); // {n0,_,n0,_} = {n0,_,n0,_} ^ {k0,_,k0,_} + + for (size_t i = 0lu; i < ROUNDS_NUMBER - 1; i++) { + raiseKey(vmm_k_0, vmm_k_1); + + std::swap(vmm_c_1, vmm_aux_0); + std::swap(vmm_n_1, vmm_aux_1); + calculateRound(vmm_k_0, vmm_k_1, vmm_c_0, vmm_c_1, vmm_n_0, vmm_n_1, vmm_aux_0, vmm_aux_1); + } + std::swap(vmm_c_1, vmm_aux_0); + std::swap(vmm_n_1, vmm_aux_1); + + if (isa == x64::avx512_core) { + vpermt2d(vmm_n_0, v_res_perm, vmm_n_1); // {n0,n1,n0,n1} = perm {n0,_,n0,_} {n1,_,n1,_} + vpermt2d(vmm_c_0, v_res_perm, vmm_c_1); // {c0,c1,c0,c1} = perm {c0,_,c0,_} {c1,_,c1,_} + vshufpd(vmm_dst[0], vmm_n_0, vmm_c_0, 0b00000000); // {n0,n1,c0,c1} = shuf {n0,n1,n0,n1} {c0,c1,c0,c1} + vshufpd(vmm_dst[1], vmm_n_0, vmm_c_0, 0b11111111); // {n0,n1,c0,c1} = shuf {n0,n1,n0,n1} {c0,c1,c0,c1} + } else if (isa == x64::avx2) { + auto ymm_dst_0 = Xbyak::Ymm(vmm_dst[0].getIdx()); + auto ymm_dst_1 = Xbyak::Ymm(vmm_dst[1].getIdx()); + auto ymm_c_0 = Xbyak::Ymm(vmm_c_0.getIdx()); + + uni_vshufps(vmm_n_0, vmm_n_0, vmm_n_1, 0b10001000); // {n0,n0,n1,n1} = shuf {n0,_,n0,_} {n1,_,n1,_} + uni_vshufps(vmm_c_0, vmm_c_0, vmm_c_1, 0b10001000); // {c0,c0,c1,c1} = shuf {c0,_,c0,_} {c1,_,c1,_} + uni_vshufps(ymm_dst_1, vmm_n_0, vmm_c_0, 0b10001000); // {n0,n1,c0,c1} = shuf {n0,n0,n1,n1} {c0,c0,c1,c1} + uni_vshufps(vmm_c_0, vmm_n_0, vmm_c_0, 0b11011101); // {n0,n1,c0,c1} = shuf {n0,n0,n1,n1} {c0,c0,c1,c1} + vperm2f128(ymm_dst_0, ymm_dst_1, ymm_c_0, 0b00100000); + vperm2f128(ymm_dst_1, ymm_dst_1, ymm_c_0, 0b00110001); + } else { + uni_vshufps(vmm_n_0, vmm_n_0, vmm_n_1, 0b10001000); + uni_vshufps(vmm_c_0, vmm_c_0, vmm_c_1, 0b10001000); + uni_vshufps(vmm_dst[0], vmm_n_0, vmm_c_0, 0b10001000); + uni_vshufps(vmm_dst[1], vmm_n_0, vmm_c_0, 0b11011101); + } +} + +template +void RandomUniform::raiseKey(const Vmm& vmm_k_0, const Vmm& vmm_k_1) { + uni_vpaddd(vmm_k_0, vmm_k_0, v_add_low_k); // {k0,_,k0,_} + {l0,_,l0,_} + uni_vpaddd(vmm_k_1, vmm_k_1, v_add_up_k); // {k1,_,k1,_} + {u0,_,u0,_} +} + +template <> +void RandomUniform::convert(const std::vector& v_dst, const std::vector& v_src) { + if (m_jcp.out_data_type.size() == 4) { + for (size_t i = 0lu; i < v_src.size(); i++) { + const auto& vmm_src = v_src[i]; + const auto& vmm_dst = v_dst[i]; + + if (m_jcp.out_data_type == element::f32) { + uni_vandps(vmm_dst, vmm_src, v_convert_1); + uni_vorps(vmm_dst, vmm_dst, v_convert_0); + uni_vsubps(vmm_dst, vmm_dst, v_convert_0); + vfmadd132ps(vmm_dst, v_min, v_range); + } else if (m_jcp.out_data_type == element::i32) { + // x % (max - min) + min + const auto v_aux_0 = getVmm(); + const auto v_aux_1 = getVmm(); + const auto ymm_src = Xbyak::Ymm(vmm_src.getIdx()); + const auto ymm_dst = Xbyak::Ymm(vmm_dst.getIdx()); + const auto ymm_aux_1 = Xbyak::Ymm(v_aux_1.getIdx()); + + // Divide in the f64 due to the f32 loses accuracy here. + vcvtudq2pd(v_aux_0, ymm_src); + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + + vextractf64x4(ymm_dst, vmm_src, 1); + vcvtudq2pd(v_aux_0, ymm_dst); + uni_vcvtpd2dq(ymm_dst, v_aux_1); + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + uni_vcvtpd2dq(ymm_aux_1, v_aux_1); + vshuff64x2(vmm_dst, vmm_dst, v_aux_1, 0b01000100); + + uni_vpaddd(vmm_dst, vmm_dst, v_min); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + } + } else if (m_jcp.out_data_type.size() == 2) { + if (m_jcp.out_data_type == element::f16 && x64::mayiuse(x64::avx512_core_fp16)) { + const auto& vmm_dst = v_dst[0]; + + if (v_src[0].getIdx() != vmm_dst.getIdx()) { + uni_vmovups(vmm_dst, v_src[0]); + } + vpermt2w(vmm_dst, v_perm_16, v_src[1]); + + uni_vandps(vmm_dst, vmm_dst, v_convert_1); + uni_vorps(vmm_dst, vmm_dst, v_convert_0); + vsubph(vmm_dst, vmm_dst, v_convert_0); + vfmadd132ph(vmm_dst, v_min, v_range); + } else if (m_jcp.out_data_type == element::bf16 && x64::mayiuse(x64::avx512_core_bf16)) { + for (size_t i = 0lu; i < v_src.size(); i++) { + const auto& vmm_dst = v_dst[i]; + + uni_vandps(vmm_dst, v_src[i], v_convert_1); + uni_vorps(vmm_dst, vmm_dst, v_convert_0); + uni_vpslld(vmm_dst, vmm_dst, 16); + + uni_vsubps(vmm_dst, vmm_dst, v_convert_2); + vfmadd132ps(vmm_dst, v_min, v_range); + } + + vcvtne2ps2bf16(v_dst[0], v_dst[1], v_dst[0]); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + } else if (m_jcp.out_data_type.size() == 8) { + if (m_jcp.out_data_type == element::i64) { + // TODO: in scope of i64 enabling. + } + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } +} + +template // Works for AVX2, SSE41 +void RandomUniform::convert(const std::vector& v_dst, const std::vector& v_src) { + if (m_jcp.out_data_type.size() == 4) { + for (size_t i = 0lu; i < v_src.size(); i++) { + auto vmm_src = v_src[i]; + auto vmm_dst = v_dst[i]; + + if (m_jcp.out_data_type == element::f32) { + uni_vandps(vmm_dst, vmm_src, ptr[r64_convert_1]); + uni_vorps(vmm_dst, vmm_dst, ptr[r64_convert_0]); + uni_vsubps(vmm_dst, vmm_dst, ptr[r64_convert_0]); + if (isa == x64::avx2) { + vfmadd213ps(vmm_dst, v_range, ptr[r64_min]); + } else { + uni_vmulps(vmm_dst, vmm_dst, v_range); + uni_vaddps(vmm_dst, vmm_dst, ptr[r64_min]); + } + } else if (m_jcp.out_data_type == element::i32) { + // x % (max - min) + min + const auto v_aux_0 = getVmm(); + const auto v_aux_1 = getVmm(); + const auto xmm_dst = Xbyak::Xmm(vmm_dst.getIdx()); + const auto ymm_dst = Xbyak::Ymm(vmm_dst.getIdx()); + const auto xmm_aux_1 = Xbyak::Xmm(v_aux_1.getIdx()); + + // Convert u32->f64. TODO: move to convert emitter after i64 enabling. + uni_vpmovzxdq(v_aux_0, xmm_dst); + uni_vorps(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + uni_vsubpd(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + + // Divide in the f64 due to the f32 loses accuracy here. + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + if (isa == x64::avx2) { + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + } else { + uni_vmulpd(v_aux_1, v_aux_1, v_range); + uni_vsubpd(v_aux_0, v_aux_0, v_aux_1); + uni_vmovups(v_aux_1, v_aux_0); + } + + if (isa == x64::avx2) { + vperm2f128(ymm_dst, ymm_dst, ymm_dst, 0b00000001); + } else { + uni_vshufpd(vmm_dst, vmm_dst, vmm_dst, 0b00000001); + } + // Convert u32->f64. TODO: move to convert emitter after i64 enabling. + uni_vpmovzxdq(v_aux_0, xmm_dst); + uni_vorps(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + uni_vsubpd(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + + uni_vcvtpd2dq(xmm_dst, v_aux_1); + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + if (isa == x64::avx2) { + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + } else { + uni_vmulpd(v_aux_1, v_aux_1, v_range); + uni_vsubpd(v_aux_0, v_aux_0, v_aux_1); + uni_vmovups(v_aux_1, v_aux_0); + } + uni_vcvtpd2dq(xmm_aux_1, v_aux_1); + if (isa == x64::avx2) { + vperm2f128(ymm_dst, ymm_dst, v_aux_1, 0b00100000); + } else { + uni_vshufpd(vmm_dst, vmm_dst, v_aux_1, 0b00000000); + } + + uni_vpaddd(vmm_dst, vmm_dst, ptr[r64_min]); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + } + } else if (m_jcp.out_data_type.size() == 8) { + if (m_jcp.out_data_type == element::i64) { + // TODO: in scope of i64 enabling. + } + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } +} + +template <> +void RandomUniform::tail(const std::vector& vmm_dst) { + Xbyak::Label l_end; + const auto k_rest_mask = getMask(); + + cmp(r64_work_amount, 0); + jle(l_end, T_NEAR); + + runPhilox(vmm_dst, v_key_64, v_counter_64, v_n_64); + convert(vmm_dst, vmm_dst); + + if (m_jcp.out_data_type.size() == 4) { + Xbyak::Label l_0; + const auto step = vlen / sizeof(uint32_t); + + cmp(r64_work_amount, step); + jl(l_0, T_NEAR); + + uni_vmovups(ptr[r64_dst], vmm_dst[0]); + add(r64_dst, vlen); + sub(r64_work_amount, step); + fillRestWorkMask(k_rest_mask, r64_work_amount); + uni_vmovups(ptr[r64_dst] | k_rest_mask, vmm_dst[1]); + jmp(l_end, T_NEAR); + + L(l_0); + fillRestWorkMask(k_rest_mask, r64_work_amount); + uni_vmovups(ptr[r64_dst] | k_rest_mask, vmm_dst[0]); + } else if (m_jcp.out_data_type.size() == 2) { + fillRestWorkMask(k_rest_mask, r64_work_amount); + vmovdqu16(ptr[r64_dst] | k_rest_mask, vmm_dst[0]); + } + + L(l_end); +} + +template <> +void RandomUniform::tail(const std::vector& vmm_dst) { + Xbyak::Label l_0, l_end; + const auto step = vlen / sizeof(uint32_t); + + cmp(r64_work_amount, 0); + jle(l_end, T_NEAR); + + runPhilox(vmm_dst, v_key_64, v_counter_64, v_n_64); + convert(vmm_dst, vmm_dst); + const auto v_rest_mask = getVmm(); + + cmp(r64_work_amount, step); + jl(l_0, T_NEAR); + + uni_vmovups(ptr[r64_dst], vmm_dst[0]); + add(r64_dst, vlen); + sub(r64_work_amount, step); + fillRestWorkMask(v_rest_mask, r64_work_amount, m_jcp.out_data_type.size()); + vmaskmovps(ptr[r64_dst], v_rest_mask, vmm_dst[1]); + jmp(l_end, T_NEAR); + + L(l_0); + fillRestWorkMask(v_rest_mask, r64_work_amount, m_jcp.out_data_type.size()); + vmaskmovps(ptr[r64_dst], v_rest_mask, vmm_dst[0]); + + L(l_end); +} + +template +void RandomUniform::tail(const std::vector& vmm_dst) { + Xbyak::Label l_0, l_end; + const auto step = vlen / sizeof(uint32_t); + + cmp(r64_work_amount, 0); + jle(l_end, T_NEAR); + + runPhilox(vmm_dst, v_key_64, v_counter_64, v_n_64); + convert(vmm_dst, vmm_dst); + + cmp(r64_work_amount, step); + jl(l_0, T_NEAR); + + uni_vmovups(ptr[r64_dst], vmm_dst[0]); + add(r64_dst, vlen); + sub(r64_work_amount, step); + store(ptr[r64_dst], vmm_dst[1], r64_work_amount, m_jcp.out_data_type.size()); + jmp(l_end, T_NEAR); + + L(l_0); + store(ptr[r64_dst], vmm_dst[0], r64_work_amount, m_jcp.out_data_type.size()); + + L(l_end); +} + +template class RandomUniform; +template class RandomUniform; +template class RandomUniform; + +} // namespace kernel +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.hpp new file mode 100644 index 00000000000000..366be4c3a132ce --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.hpp @@ -0,0 +1,99 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "jit_kernel_base.hpp" + +#if defined(OPENVINO_ARCH_X86_64) + +namespace ov { +namespace intel_cpu { +namespace kernel { + +struct RandomUniformCompileParams { + element::Type out_data_type = element::f32; +}; + +struct RandomUniformCallArgs { + void* dst_ptr; + const void* key_ptr; + const void* counter_ptr; + const void* n_ptr; + const void* min_ptr; + const void* range_ptr; + uint64_t work_amount = 0lu; +}; + +template +class RandomUniform : public JitKernel { +public: + DECLARE_CPU_JIT_AUX_FUNCTIONS(RandomUniform) + + explicit RandomUniform(const RandomUniformCompileParams& jcp); + + void generate() override; + +private: + using Vmm = typename dnnl::impl::utils::conditional3::type; + using Vmask = typename dnnl::impl::utils::conditional3::type; + + RegistersPool::Reg r64_dst; + RegistersPool::Reg r64_work_amount; + RegistersPool::Reg r64_n_inc; + RegistersPool::Reg r64_convert_0; + RegistersPool::Reg r64_convert_1; + RegistersPool::Reg r64_min; + RegistersPool::Reg r64_f64_pow_52; + + const Xbyak::Reg64 r64_params = Xbyak::Reg64(dnnl::impl::cpu::x64::abi_param_regs[0]); + + // Vector registers. + RegistersPool::Reg v_max_mul_n_64; + RegistersPool::Reg v_max_mul_c_64; + RegistersPool::Reg v_add_low_k; + RegistersPool::Reg v_add_up_k; + RegistersPool::Reg v_convert_0; + RegistersPool::Reg v_convert_1; + RegistersPool::Reg v_convert_2; + RegistersPool::Reg v_n_inc; + RegistersPool::Reg v_key_64; + RegistersPool::Reg v_counter_64; + RegistersPool::Reg v_n_64; + RegistersPool::Reg v_min; + RegistersPool::Reg v_range; + RegistersPool::Reg v_res_perm; + RegistersPool::Reg v_perm_16; + + void initVectors(); + + void process(); + + void runPhilox(const std::vector& vmm_res, const Vmm& vmm_key, const Vmm& vmm_counter, const Vmm& vmm_n); + + void calculateRound(const Vmm& vmm_k_0, const Vmm& vmm_k_1, const Vmm& vmm_c_0, const Vmm& vmm_c_1, + const Vmm& vmm_n_0, const Vmm& vmm_n_1, const Vmm& vmm_aux_0, const Vmm& vmm_aux_1); + + void raiseKey(const Vmm& vmm_k_0, const Vmm& vmm_k_1); + + void convert(const std::vector& vmm_dst, const std::vector& vmm_src); + + void tail(const std::vector& vmm_dst); + + static constexpr uint64_t ROUNDS_NUMBER = 10lu; + static constexpr uint32_t CRUSH_RESISTANCE_CONST_LOWER_VALUE = 0x9E3779B9; + static constexpr uint32_t CRUSH_RESISTANCE_CONST_UPPER_VALUE = 0xBB67AE85; + static constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_N = 0xD2511F53; + static constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER = 0xCD9E8D57; +}; + +} // namespace kernel +} // namespace intel_cpu +} // namespace ov + +#endif // OPENVINO_ARCH_X86_64 diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index bfd1d8fa982901..d2a46ac97da017 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -626,7 +626,7 @@ void NonMaxSuppression::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - const std::vector supportedFloatPrecision = {Precision::FP32, Precision::BF16}; + const std::vector supportedFloatPrecision = {Precision::FP32, Precision::BF16, Precision::FP16}; const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp index 4571eaa9e8c998..be2a8d894fc7f8 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp @@ -57,7 +57,7 @@ void NonZero::initSupportedPrimitiveDescriptors() { return; const auto &inPrc = getOriginalInputPrecisionAtPort(0); - if (!one_of(inPrc, Precision::FP32, Precision::BF16, Precision::I32, Precision::U32, Precision::I8, Precision::U8)) { + if (!one_of(inPrc, Precision::FP32, Precision::BF16, Precision::FP16, Precision::I32, Precision::U32, Precision::I8, Precision::U8)) { IE_THROW() << "Can't create primitive descriptor for NonZero layer with name: " << getName() << " doesn't support " << inPrc.name() << " precision on 0 port"; } @@ -123,6 +123,7 @@ void NonZero::execute(dnnl::stream strm) { OV_SWITCH(intel_cpu, NonZeroExecute, ctx, inputPrec, OV_CASE(Precision::FP32, float), OV_CASE(Precision::BF16, bfloat16_t), + OV_CASE(Precision::FP16, float16), OV_CASE(Precision::I32, int), OV_CASE(Precision::U32, uint32_t), OV_CASE(Precision::I8, int8_t), diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index c4fc60a9d9e855..12f3ecf397764a 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -796,10 +796,14 @@ void NormalizeL2::initSupportedPrimitiveDescriptors() { inputPrecision = outputPrecision = Precision::BF16; } - if (!one_of(inputPrecision, Precision::FP32, Precision::BF16, Precision::I8, Precision::U8)) { + if (one_of(Precision::FP16, inputPrecision, outputPrecision) && mayiuse(cpu::x64::sse41)) { + inputPrecision = outputPrecision = Precision::FP32; + } + + if (!one_of(inputPrecision, Precision::FP32, Precision::BF16, Precision::FP16, Precision::I8, Precision::U8)) { THROW_ERROR << "has unsupported input precision: " << inputPrecision; } - if (!one_of(outputPrecision, Precision::FP32, Precision::BF16, Precision::I8, Precision::U8)) { + if (!one_of(outputPrecision, Precision::FP32, Precision::BF16, Precision::FP16, Precision::I8, Precision::U8)) { THROW_ERROR << "has unsupported output precision: " << outputPrecision; } @@ -1483,7 +1487,8 @@ std::shared_ptr NormalizeL2::NormalizeL2Execut OV_CASE2(Precision::U8, Precision::FP32, uint8_t, float), OV_CASE2(Precision::I8, Precision::FP32, int8_t, float), OV_CASE2(Precision::FP32, Precision::FP32, float, float), - OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t)); + OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t), + OV_CASE2(Precision::FP16, Precision::FP16, float16_t, float16_t)); return ctx.executor; } diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 0bd6f3208c1e87..42aa97d062702b 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -271,14 +271,31 @@ void Pooling::getSupportedDescriptors() { const auto &childShape = getOutputShapeAtPort(0); const size_t inputRank = getInputShapeAtPort(0).getRank(); + if (isDynamicNode()) { + inShape = MemoryDescUtils::makeDummyShape(parentShape); + const auto& origDims = parentShape.getDims(); + const auto& origMaxDims = parentShape.getMaxDims(); + + auto inDims = inShape.getStaticDims(); + for (size_t i = 0; i < inDims.size() - 2; i++) { + if (origDims[i + 2] == Shape::UNDEFINED_DIM) { + inDims[i + 2] = std::min(origMaxDims[i + 2], std::max(inDims[i + 2], poolingAttrs.kernel[i])); + } + } + inShape = Shape(inDims); + } else { + inShape = parentShape; + } + #if defined(OV_CPU_WITH_ACL) // WA: we may specify any layout here (NCHW or NHWC) since both are supported by ACL - arm_compute::DataLayout dataLayout = (parentShape.getDims().size() == 5) ? arm_compute::DataLayout::NDHWC : arm_compute::DataLayout::NCHW; - arm_compute::TensorInfo srcTensorInfo = arm_compute::TensorInfo(shapeCast(parentShape.getDims()), + arm_compute::DataLayout dataLayout = (inShape.getDims().size() == 5) ? arm_compute::DataLayout::NDHWC : arm_compute::DataLayout::NCHW; + arm_compute::TensorInfo srcTensorInfo = arm_compute::TensorInfo(shapeCast(inShape.getDims()), 1, precisionToAclDataType(inputPrecision), dataLayout); - arm_compute::TensorInfo dstTensorInfo = arm_compute::TensorInfo(shapeCast(childShape.getDims()), + arm_compute::TensorInfo dstTensorInfo = arm_compute::TensorInfo(shapeCast(isDynamicNode() ? MemoryDescUtils::makeDummyShape(childShape).getDims() : + childShape.getDims()), 1, precisionToAclDataType(outputPrecision), dataLayout); @@ -287,16 +304,19 @@ void Pooling::getSupportedDescriptors() { useACL = AclPoolingExecutor::isSupported(srcTensorInfo, dstTensorInfo, poolingAttrs, - parentShape.getDims().size(), + inShape.getDims().size(), getOriginalOutputsNumber(), dataLayout, (getOriginalOutputsNumber() > 1) ? &getOutputShapeAtPort(1).getDims() : nullptr, &pool_info, - &pool3d_info); + &pool3d_info, + isDynamicNode()); //FIXME: 5D tensors case is not assigned to ACL because there is no way to check layout here //NEPooling3dLayer supports NDHWC only - if (parentShape.getDims().size() == 5) + if (inShape.getDims().size() == 5) { useACL = false; + DEBUG_LOG("FIXME: 5D tensors case is not assigned to ACL because there is no way to check layout in getSupportedDescriptors()"); + } #endif if (useACL) return; @@ -324,19 +344,7 @@ void Pooling::getSupportedDescriptors() { if ((inputRank < 3) || (inputRank > 5)) IE_THROW() << "Pooling layer. Unsupported mode. Only 3D, 4D and 5D blobs are supported as input."; - inShape = MemoryDescUtils::makeDummyShape(parentShape); - if (isDynamicNode()) { - const auto& origDims = parentShape.getDims(); - const auto& origMaxDims = parentShape.getMaxDims(); - auto inDims = inShape.getStaticDims(); - for (size_t i = 0; i < inDims.size() - 2; i++) { - if (origDims[i + 2] == Shape::UNDEFINED_DIM) { - inDims[i + 2] = std::min(origMaxDims[i + 2], std::max(inDims[i + 2], poolingAttrs.kernel[i])); - } - } - inShape = Shape(inDims); - } initEffectiveAttributes(inShape, MemoryDescUtils::makeDummyShape(childShape)); @@ -386,7 +394,12 @@ void Pooling::prepareParams() { } else { attr = initPrimitiveAttr(); } - + if (isDynamicNode()) { + if (poolingAttrs.auto_pad) { + poolingAttrs.data_pad_begin = shapeInference->get_pads_begin(); + poolingAttrs.data_pad_end = shapeInference->get_pads_end(); + } + } if (useACL) { auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); @@ -414,10 +427,6 @@ void Pooling::prepareParams() { auto outDesc = getChildEdgesAtPort(0)[0]->getMemory().getDescWithType(); if (isDynamicNode()) { - if (poolingAttrs.auto_pad) { - poolingAttrs.data_pad_begin = shapeInference->get_pads_begin(); - poolingAttrs.data_pad_end = shapeInference->get_pads_end(); - } initEffectiveAttributes(inDesc->getShape(), outDesc->getShape()); } @@ -593,18 +602,17 @@ void Pooling::initSupportedPrimitiveDescriptors() { config.inConfs.resize(getParentEdges().size()); config.outConfs.resize(getOriginalOutputsNumber()); - config.inConfs[0].setMemDesc( - creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(0), getInputShapeAtPort(0))); - config.outConfs[0].setMemDesc( - creatorsMap.at(format)->createSharedDesc(getOriginalOutputPrecisionAtPort(0), getOutputShapeAtPort(0))); - std::vector srcMemoryDescs; - for (const auto& inConf : config.inConfs) { - srcMemoryDescs.push_back(inConf.getMemDesc()); + for (size_t i = 0; i < config.inConfs.size(); i++) { + config.inConfs[i].setMemDesc( + creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(i), getInputShapeAtPort(i))); + srcMemoryDescs.push_back(config.inConfs[i].getMemDesc()); } std::vector dstMemoryDescs; - for (const auto& outConf : config.outConfs) { - dstMemoryDescs.push_back(outConf.getMemDesc()); + for (size_t i = 0; i < config.outConfs.size(); i++) { + config.outConfs[i].setMemDesc( + creatorsMap.at(format)->createSharedDesc(getOriginalOutputPrecisionAtPort(i), getOutputShapeAtPort(i))); + dstMemoryDescs.push_back(config.outConfs[i].getMemDesc()); } auto factory = std::make_shared( diff --git a/src/plugins/intel_cpu/src/nodes/random_uniform.cpp b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp new file mode 100644 index 00000000000000..77d823710c942f --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp @@ -0,0 +1,532 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" + +#include "ie_parallel.hpp" +#include "ie_ngraph_utils.hpp" +#include +#include +#include "shape_inference/custom/random_uniform.hpp" + +namespace ov { +namespace intel_cpu { +namespace node { + +bool RandomUniform::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (op->get_type_info() != op::v8::RandomUniform::get_type_info_static()) { + errorMessage = "Only RandomUniform operation from the opset8 is supported by the CPU plugin."; + return false; + } + } catch (...) { + return false; + } + return true; +} + +RandomUniform::RandomUniform(const std::shared_ptr& op, const GraphContext::CPtr& context) + : Node(op, context, RandomUniformShapeInferFactory(op)) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + THROW_CPU_NODE_ERR(errorMessage); + } + + // RandomUniform should generate new sequence each run even if all inputs are constants. So that method Node::IsConstant() + // doesn't return 'True' for RandomUniform with all constant inputs and the node generates new values for each inference, + // we set 'NoConst' value for 'ConstantType' in ctor. + constant = ConstantType::NoConst; + + auto rnd_op = as_type_ptr(op); + m_global_seed = rnd_op->get_global_seed(); + m_op_seed = rnd_op->get_op_seed(); + + m_output_prc = op->get_output_element_type(0); + + for (size_t i = 0lu; i < op->get_input_size(); i++) { + if (is_type(op->get_input_node_ptr(i))) { + m_const_inputs[i] = true; + } + } + + if (m_algo == STL) { + m_generator = std::default_random_engine{static_cast(m_op_seed)}; + } +} + +void RandomUniform::getSupportedDescriptors() { + if (getParentEdges().size() != 3) { + THROW_CPU_NODE_ERR("has incorrect number of input edges."); + } + if (getChildEdges().empty()) { + THROW_CPU_NODE_ERR("has incorrect number of output edges."); + } +} + +void RandomUniform::initSupportedPrimitiveDescriptors() { + auto shape_prc = getOriginalInputPrecisionAtPort(SHAPE); + if (!one_of(shape_prc, InferenceEngine::Precision::I32, InferenceEngine::Precision::I64)) { + shape_prc = InferenceEngine::Precision::I32; + } + + auto out_prc = getOriginalOutputPrecisionAtPort(0); + if (out_prc.is_float() && ((m_algo == PHILOX && + !one_of(out_prc, InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16, InferenceEngine::Precision::BF16)) || + (m_algo == STL && !one_of(out_prc, InferenceEngine::Precision::FP32)))) { + out_prc = InferenceEngine::Precision::FP32; + } + if (!out_prc.is_float() && !one_of(out_prc, InferenceEngine::Precision::I32, InferenceEngine::Precision::I64)) { + out_prc = InferenceEngine::Precision::I32; + } + m_output_prc = InferenceEngine::details::convertPrecision(out_prc); + + addSupportedPrimDesc({{LayoutType::ncsp, shape_prc, m_const_inputs[SHAPE]}, + {LayoutType::ncsp, out_prc, m_const_inputs[MIN_VAL]}, + {LayoutType::ncsp, out_prc, m_const_inputs[MAX_VAL]}}, + {{LayoutType::ncsp, out_prc}}, + ref_any); +} + +void RandomUniform::createPrimitive() { + if (m_const_inputs[MIN_VAL]) { + initEdgeValues(m_min_val, getParentEdgeAt(MIN_VAL)->getMemoryPtr()->getData(), m_output_prc); + } + if (m_const_inputs[MAX_VAL]) { + initEdgeValues(m_max_val, getParentEdgeAt(MAX_VAL)->getMemoryPtr()->getData(), m_output_prc); + evalRange(); + } + + if (m_algo == PHILOX) { +#if defined(OPENVINO_ARCH_X86_64) + kernel::RandomUniformCompileParams jcp; + + jcp.out_data_type = m_output_prc; + + m_jit_kernel = kernel::JitKernel::createInstance(jcp); + + if (m_jit_kernel) { + if (auto selected_pd = getSelectedPrimitiveDescriptor()) { + using namespace dnnl::impl::cpu; + if (m_jit_kernel->getIsa() == x64::avx512_core) { + selected_pd->setImplementationType(jit_avx512); + } else if (m_jit_kernel->getIsa() == x64::avx2) { + selected_pd->setImplementationType(jit_avx2); + } else if (m_jit_kernel->getIsa() == x64::sse41) { + selected_pd->setImplementationType(jit_sse42); + } + } + } +#endif // OPENVINO_ARCH_X86_64 + } + + if (m_const_inputs[SHAPE]) { + Node::createPrimitive(); + } +} + +bool RandomUniform::needPrepareParams() const { + if (m_out_shape != getChildEdgeAt(0)->getMemoryPtr()->getShape().getStaticDims()) { + return true; + } + return false; +} + +void RandomUniform::prepareParams() { + m_out_shape = getChildEdgeAt(0)->getMemoryPtr()->getShape().getStaticDims(); + m_out_el_num = std::accumulate(m_out_shape.begin(), m_out_shape.end(), 1lu, std::multiplies()); + + if (m_algo == PHILOX) { + m_skip_count = m_out_el_num * SKIP_CONST; + + if (m_out_el_num < PHILOX_PARALLEL_EXECUTION_THRESHOLD) { + m_threads_num = 1; + } else { + m_threads_num = parallel_get_max_threads(); + } + m_thread_params.resize(m_threads_num); + + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { + auto& p = m_thread_params[ithr]; + uint64_t start = 0lu, end = 0lu; + + if (m_jit_kernel) { +#if defined(OPENVINO_ARCH_X86_64) + const auto block_size = (m_jit_kernel->getVectorLen() / m_output_prc.size()) * 2; + const auto blocks_num = (m_out_el_num + block_size - 1) / block_size; + const auto blocks_per_thr = (blocks_num + nthr - 1) / nthr; + + start = ithr * blocks_per_thr * block_size; + end = (ithr + 1) * blocks_per_thr * block_size; +#endif // OPENVINO_ARCH_X86_64 + } else { + const auto groups_num = (m_out_el_num + PHILOX_GROUP_SIZE - 1) / PHILOX_GROUP_SIZE; + const auto groups_per_thr = (groups_num + nthr - 1) / nthr; + + start = ithr * groups_per_thr * PHILOX_GROUP_SIZE; + end = (ithr + 1) * groups_per_thr * PHILOX_GROUP_SIZE; + + p.step = m_output_prc.size() > 4 ? 2 : 4; + } + + if (end > m_out_el_num) { + end = m_out_el_num; + } + if (start > end) { + start = end; + } + p.work_amount = end - start; + p.n_shift = start / PHILOX_GROUP_SIZE; + p.dst_shift = start * m_output_prc.size(); + }); + } +} + +void RandomUniform::execute(dnnl::stream strm) { + if (!m_const_inputs[MIN_VAL]) { + initEdgeValues(m_min_val, getParentEdgeAt(MIN_VAL)->getMemoryPtr()->getData(), m_output_prc); + if (m_const_inputs[MAX_VAL]) { + evalRange(); + } + } + if (!m_const_inputs[MAX_VAL]) { + initEdgeValues(m_max_val, getParentEdgeAt(MAX_VAL)->getMemoryPtr()->getData(), m_output_prc); + evalRange(); + } + + auto data = getChildEdgeAt(0)->getMemoryPtr()->getData(); + + if (m_algo == PHILOX) { + m_state = computePhilox(data, m_out_el_num, m_state); + } else if (m_algo == STL) { + computeStl(data, m_out_el_num); + } else { + THROW_CPU_NODE_ERR("unsupported algorithm."); + } +} + +void RandomUniform::executeDynamicImpl(dnnl::stream strm) { + execute(strm); +} + +////////////// PHILOX algo /////////////// + +namespace { +// Following const values are taken from the original paper: +// https://www.thesalmons.org/john/random123/papers/random123sc11.pdf +constexpr uint32_t CRUSH_RESISTANCE_CONST_LOWER_VALUE = 0x9E3779B9; +constexpr uint32_t CRUSH_RESISTANCE_CONST_UPPER_VALUE = 0xBB67AE85; +constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_N = 0xD2511F53; +constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER = 0xCD9E8D57; +constexpr uint64_t ROUNDS_NUMBER = 10llu; + +inline void calculateRound(const uint32_t* key, uint32_t* counter, uint32_t* n) { + uint64_t prod_0 = STATISTIC_MAXIMIZING_MULTIPLIER_N * n[0]; + uint64_t prod_1 = STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER * counter[0]; + n[0] = static_cast(prod_1 >> 32) ^ n[1] ^ key[0]; + n[1] = static_cast(prod_1); + counter[0] = static_cast(prod_0 >> 32) ^ counter[1] ^ key[1]; + counter[1] = static_cast(prod_0); +} + +inline void raiseKey(uint32_t* key) { + key[0] += CRUSH_RESISTANCE_CONST_LOWER_VALUE; + key[1] += CRUSH_RESISTANCE_CONST_UPPER_VALUE; +} + +inline void runPhilox(uint64_t key, uint64_t counter, uint64_t n, uint32_t* res) { + uint32_t* key_32 = reinterpret_cast(&key); + uint32_t* counter_32 = reinterpret_cast(&counter); + uint32_t* n_32 = reinterpret_cast(&n); + + for (size_t i = 0lu; i < ROUNDS_NUMBER; i++) { + calculateRound(key_32, counter_32, n_32); + if (i < ROUNDS_NUMBER - 1) + raiseKey(key_32); + } + + res[0] = n_32[0]; + res[1] = n_32[1]; + res[2] = counter_32[0]; + res[3] = counter_32[1]; +} + +inline void convertToOutputType(const uint32_t* in, + float min, + float range, + float* out, + size_t el_to_copy) { + RandomUniform::OutputType out_val; + + for (size_t i = 0lu; i < el_to_copy; i++) { + out_val.u32 = 0x3f800000 | (in[i] & 0x7fffffu); + out[i] = (out_val.f32 - 1.f) * range + min; + } +} + +inline void convertToOutputType(const uint32_t* in, + float16 min, + float16 range, + float16* out, + size_t el_to_copy) { + RandomUniform::OutputType out_val; + + for (size_t i = 0lu; i < el_to_copy; i++) { + uint16_t x_uint16 = static_cast(in[i]); + out_val.u16 = 0x3c00 | (x_uint16 & 0x03ffu); + out[i] = (out_val.f16 - static_cast(1)) * range + min; + } +} + +inline void convertToOutputType(const uint32_t* in, + bfloat16 min, + bfloat16 range, + bfloat16* out, + size_t el_to_copy) { + RandomUniform::OutputType out_val; + + for (size_t i = 0lu; i < el_to_copy; i++) { + uint16_t x_uint16 = static_cast(in[i]); + out_val.u16 = 0x3f80 | (x_uint16 & 0x7fu); + out[i] = (out_val.bf16 - static_cast(1)) * range + min; + } +} + +inline void convertToOutputType(const uint32_t* in, + int32_t min, + int32_t range, + int32_t* out, + size_t el_to_copy) { + for (size_t i = 0lu; i < el_to_copy; i++) { + out[i] = static_cast(in[i] % range + min); + } +} + +inline void convertToOutputType(const uint32_t* in, + int64_t min, + int64_t range, + int64_t* out, + size_t el_to_copy) { + for (size_t i = 0lu; i < el_to_copy; i++) { + out[i] = static_cast(((static_cast(in[i * 2]) << 32) + in[i * 2 + 1]) % range + min); + } +} + +} // namespace + +std::pair RandomUniform::computePhilox(void* out, size_t out_el_num, const std::pair& prev_state) { + // When both seed values are equal to zero RandomUniform should generate non-deterministic sequence. + if (m_global_seed == 0lu && m_op_seed == 0lu) { + std::srand(static_cast(std::time(nullptr))); + m_global_seed = std::rand(); + } + + uint64_t n_state = prev_state.first; + uint64_t counter_state = prev_state.second; + + uint64_t counter = counter_state > 0 ? counter_state : m_op_seed; + + auto out_u8 = reinterpret_cast(out); + + if (m_jit_kernel) { +#if defined(OPENVINO_ARCH_X86_64) + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { + auto& p = m_thread_params[ithr]; + if (p.work_amount == 0lu) { + return; + } + auto n = n_state + p.n_shift; + + kernel::RandomUniformCallArgs args; + + args.dst_ptr = (out_u8 + p.dst_shift); + args.key_ptr = &m_global_seed; + args.counter_ptr = &counter; + args.n_ptr = &n; + args.min_ptr = &m_min_val; + args.range_ptr = &m_range_val; + args.work_amount = p.work_amount; + + (*m_jit_kernel)(&args); + }); +#endif // OPENVINO_ARCH_X86_64 + } else { + auto threadBody = [&](const int ithr, const int nthr) { + auto& p = m_thread_params[ithr]; + if (p.work_amount == 0lu) { + return; + } + auto n = n_state + p.n_shift; + auto out_cur = out_u8 + p.dst_shift; + auto work_rest = static_cast(p.work_amount); + uint32_t res[4]; + +#define EXEC_CASE(P) \ + case element::P: { \ + auto out_t = reinterpret_cast::value_type *>(out_cur); \ + for (; work_rest > 0l; work_rest -= p.step, out_t += p.step) { \ + runPhilox(m_global_seed, counter, n, res); \ + auto el_to_copy = std::min(p.step, static_cast(work_rest)); \ + convertToOutputType(res, m_min_val.P, m_range_val.P, out_t, el_to_copy); \ + if (++n == 0) { \ + counter++; \ + } \ + } \ + } break; + + switch (m_output_prc) { + EXEC_CASE(f32) + EXEC_CASE(f16) + EXEC_CASE(bf16) + EXEC_CASE(i32) + EXEC_CASE(i64) + default: THROW_CPU_NODE_ERR("Unsupported type of RandomUniform: ", m_output_prc.to_string()); + } + +#undef EXEC_CASE + }; + + parallel_nt(m_threads_num, threadBody); + } + + // Calculate counter values for next RandomUniform run. + n_state += m_skip_count; + if (n_state < m_skip_count) { + counter_state++; + } + + return { n_state, counter_state }; +} + +////////////// STL algo /////////////// +void RandomUniform::computeStl(void* out, size_t work_amount) { + switch (m_output_prc) { + case element::f32: { + generateData>( + std::uniform_real_distribution{m_min_val.f32, m_max_val.f32}, out, work_amount); + } break; + case element::i32: { + generateData>( + std::uniform_int_distribution{m_min_val.i32, m_max_val.i32}, out, work_amount); + } break; + case element::i64: { + generateData>( + std::uniform_int_distribution{m_min_val.i64, m_max_val.i64}, out, work_amount); + } break; + default: + THROW_CPU_NODE_ERR("has unsupported output type: ", m_output_prc); + } +} + +template +void RandomUniform::generateData(DISTR_TYPE distribution, void* out, size_t work_amount) { + auto dst = reinterpret_cast(out); + for (size_t i = 0; i < work_amount; i++) { + *dst = distribution(m_generator); + dst++; + } +} +////////////////////////////////// + +void RandomUniform::initEdgeValues(OutputType& dst, const void* src, const element::Type& output_type) { +#define EL_CASE(E) \ + case element::E: \ + dst.E = *reinterpret_cast::value_type *>(src); \ + break; + + switch (output_type) { + EL_CASE(f32) + EL_CASE(f16) + EL_CASE(bf16) + EL_CASE(i32) + EL_CASE(i64) + EL_CASE(f64) + default: + THROW_CPU_NODE_ERR("has unsupported output precision: ", output_type); + } + +#undef EL_CASE +} + +void RandomUniform::evalRange() { +#define EL_CASE(E) \ + case element::E: \ + m_range_val.E = m_max_val.E - m_min_val.E; \ + break; + + switch (m_output_prc) { + EL_CASE(f32) + EL_CASE(f16) + EL_CASE(bf16) + EL_CASE(i32) + EL_CASE(i64) + EL_CASE(f64) + default: + THROW_CPU_NODE_ERR("has unsupported output precision: ", m_output_prc); + } + +#undef EL_CASE +} + +std::string RandomUniform::getPrimitiveDescriptorType() const { + auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor(); + + impl_desc_type type = impl_desc_type::undef; + if (selectedPrimitiveDesc) { + type = selectedPrimitiveDesc->getImplementationType(); + } + + std::string str_type; + + auto add_type = [&](std::string t) { + if (!str_type.empty() && t.c_str()[0] != '_') + str_type += "_"; + str_type += t; + }; + +#define SEARCH_TYPE(_type) \ + if ((type & impl_desc_type::_type) == impl_desc_type::_type) \ + add_type(#_type) + + SEARCH_TYPE(undef); + SEARCH_TYPE(jit); + SEARCH_TYPE(ref); + + SEARCH_TYPE(avx512); + SEARCH_TYPE(avx2); + SEARCH_TYPE(sse42); + SEARCH_TYPE(any); + +#undef SEARCH_TYPE + + if (type == impl_desc_type::unknown) + str_type = "unknown"; + else if (str_type.empty()) + str_type = "undef"; + + if (selectedPrimitiveDesc) { + if (selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision() != InferenceEngine::Precision::U8) { + str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision().name()); + } else { + str_type += "_I8"; + } + } + + return str_type; +} + +bool RandomUniform::needShapeInfer() const { + return !m_const_inputs[SHAPE]; +} + +bool RandomUniform::isExecutable() const { + return !isInputTensorAtPortEmpty(SHAPE); +} + +bool RandomUniform::created() const { + return getType() == Type::RandomUniform; +} + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/random_uniform.hpp b/src/plugins/intel_cpu/src/nodes/random_uniform.hpp new file mode 100644 index 00000000000000..ecbfebdf5d79c6 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/random_uniform.hpp @@ -0,0 +1,120 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "kernels/x64/random_uniform.hpp" + +namespace ov { +namespace intel_cpu { +namespace node { + +class RandomUniform : public Node { +public: + union OutputType { + float f32; + float16 f16; + bfloat16 bf16; + double f64; + int32_t i32; + uint32_t u32; + uint16_t u16; + int64_t i64; + }; + + RandomUniform(const std::shared_ptr& op, const GraphContext::CPtr& context); + + void getSupportedDescriptors() override; + + void initSupportedPrimitiveDescriptors() override; + + bool needPrepareParams() const override; + + void prepareParams() override; + + void execute(dnnl::stream strm) override; + + void executeDynamicImpl(dnnl::stream strm) override; + + bool isExecutable() const override; + + void createPrimitive() override; + + bool created() const override; + + bool canBeInPlace() const override { return false; } + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + + std::string getPrimitiveDescriptorType() const override; + +protected: + bool needShapeInfer() const override; + +private: + void computeStl(void* out, size_t work_amount); + + std::pair computePhilox(void* out, size_t work_amount, const std::pair& prev_state); + + template + void generateData(DISTR_TYPE distribution, void* out, size_t work_amount); + + void initOutShape(VectorDims& dst, const void* src, const element::Type& shape_type, size_t len); + + void initEdgeValues(OutputType& dst, const void* src, const element::Type& output_type); + + void evalRange(); + + enum { SHAPE = 0, MIN_VAL, MAX_VAL }; + enum AlgoType { STL, PHILOX }; + + bool m_const_inputs[3] = {false, false, false}; + + ov::element::Type m_output_prc; + uint64_t m_global_seed = 0lu; + uint64_t m_op_seed = 0lu; + std::pair m_state {0lu, 0lu}; + + VectorDims m_out_shape = {}; + uint64_t m_out_el_num = 1lu; + OutputType m_min_val; + OutputType m_max_val; + OutputType m_range_val; + AlgoType m_algo = PHILOX; + + std::default_random_engine m_generator; + + struct ThreadParams { + uint64_t work_amount = 0lu; + uint64_t dst_shift = 0lu; + uint64_t n_shift = 0lu; + uint64_t step = 0lu; + }; + + uint64_t m_threads_num = 0lu; + std::vector m_thread_params; + + ///// PHILOX constants ///// + + // Determines how many sequence elements of RNG sequence are skipped between runs. + // Can be any positive value, 256 is chosen for parity with Tensorflow. + static constexpr uint64_t SKIP_CONST = 256lu; + + // Philox algorithm returns 4 elements of RNG sequence per each invocation + static constexpr uint64_t PHILOX_GROUP_SIZE = 4lu; + + // Output elements number threshold to execute on one thread. + static constexpr uint64_t PHILOX_PARALLEL_EXECUTION_THRESHOLD = 1000lu; + + uint64_t m_skip_count = 0lu; + ///////////////////////////////////////////////////////////////////////////////// + + std::shared_ptr m_jit_kernel; +}; + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index b42dc99b390fb4..091e31813125cf 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -2,18 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include -#include +#include "reference.h" -#include "common/blocked_desc_creator.h" #include "common/cpu_memcpy.h" +#include #include "openvino/core/shape_util.hpp" -#include "openvino/runtime/tensor.hpp" -#include "reference.h" -using namespace dnnl; using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -21,21 +15,15 @@ namespace ov { namespace intel_cpu { namespace node { -Reference::Reference(const std::shared_ptr& op, const GraphContext::CPtr context, +Reference::Reference(const std::shared_ptr& op, const GraphContext::CPtr& context, const std::string& errorMessage) : - Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ngraphOp(op), additionalErrorMessage(errorMessage) { + Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ovCoreNode(op), additionalErrorMessage(errorMessage) { if (!op->has_evaluate()) { IE_THROW(NotImplemented) << "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)"; } + setType(Type::Reference); setTypeStr("Reference"); - - // RandomUniform should generate new sequence each run even if all inputs are constants. So that method Node::IsConstant() - // doesn't return 'True' for RandomUniform with all constant inputs and the node generates new values for each inference, - // we set 'NoConst' value for 'ConstantType' in ctor - if (ov::is_type(ngraphOp)) { - constant = ConstantType::NoConst; - } } void Reference::getSupportedDescriptors() {} @@ -47,13 +35,13 @@ void Reference::initSupportedPrimitiveDescriptors() { std::vector inputConfigurators; inputConfigurators.reserve(inputShapes.size()); for (size_t i = 0; i < inputShapes.size(); i++) { - inputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_input_element_type(i)), inputShapes[i]); + inputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ovCoreNode->get_input_element_type(i)), inputShapes[i]); } std::vector outputConfigurators; outputConfigurators.reserve(inputShapes.size()); for (size_t i = 0; i < outputShapes.size(); i++) { - outputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_output_element_type(i)), outputShapes[i]); + outputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ovCoreNode->get_output_element_type(i)), outputShapes[i]); } addSupportedPrimDesc(inputConfigurators, outputConfigurators, impl_desc_type::ref); @@ -64,8 +52,8 @@ void Reference::createPrimitive() {} void Reference::execute(dnnl::stream strm) { auto inputs = prepareInputs(); auto outputs = prepareOutputs(); - if (!ngraphOp->evaluate(outputs, inputs)) { - IE_THROW() << "Evaluation failed on node of type: " << std::string(ngraphOp->get_type_name()) << " name: " << getName(); + if (!ovCoreNode->evaluate(outputs, inputs)) { + THROW_CPU_NODE_ERR("evaluation failed for core operation: ", std::string(ovCoreNode->get_type_name())); } } @@ -81,18 +69,16 @@ void Reference::executeDynamicImpl(dnnl::stream strm) { for (size_t i = 0; i < outputShapes.size(); ++i) { auto mem_desc = getBaseMemDescAtOutputPort(i); if (mem_desc->isDefined()) { - outputs.emplace_back(ngraphOp->get_output_element_type(i), mem_desc->getShape().getStaticDims()); + outputs.emplace_back(ovCoreNode->get_output_element_type(i), mem_desc->getShape().getStaticDims()); } else { - outputs.emplace_back(ngraphOp->get_output_element_type(i), ov::util::make_dynamic_shape()); + outputs.emplace_back(ovCoreNode->get_output_element_type(i), ov::util::make_dynamic_shape()); } } } else { - IE_THROW(Unexpected) << - "Unexpected shape infer result status during the inference of a node with type " << - getTypeStr() << " and name " << getName(); + THROW_CPU_NODE_ERR("got unexpected shape infer result status during the inference."); } - if (!ngraphOp->evaluate(outputs, inputs)) { - IE_THROW() << "Evaluation failed on node of type: " << std::string(ngraphOp->get_type_name()) << " name: " << getName(); + if (!ovCoreNode->evaluate(outputs, inputs)) { + THROW_CPU_NODE_ERR("evaluation failed for core operation: ", std::string(ovCoreNode->get_type_name())); } if (ShapeInferStatus::skip == result.status) { std::vector newOutputDims; @@ -105,8 +91,7 @@ void Reference::executeDynamicImpl(dnnl::stream strm) { auto memory = getChildEdgesAtPort(i)[0]->getMemoryPtr(); auto& tensor = outputs[i]; if (memory->getSize() != tensor.get_byte_size()) { - IE_THROW(Unexpected) << "Output tensor data size mismatch occurred during the inference of a node with type " << - getTypeStr() << " and name " << getName() << " on output port number " << i; + THROW_CPU_NODE_ERR("output tensor data size mismatch occurred during the inference on output port number ", i); } cpu_memcpy(memory->getData(), tensor.data(), tensor.get_byte_size()); } @@ -125,9 +110,9 @@ ov::TensorVector Reference::prepareInputs() const { ov::TensorVector inputs; for (size_t i = 0; i < inputShapes.size(); i++) { void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().getData(); - ov::Shape shape = ngraphOp->get_input_partial_shape(i).rank().get_length() == 0 ? + ov::Shape shape = ovCoreNode->get_input_partial_shape(i).rank().get_length() == 0 ? ov::Shape{} : getParentEdgesAtPort(i)[0]->getMemory().getStaticDims(); - inputs.push_back(ov::Tensor(ngraphOp->get_input_element_type(i), shape, srcDataPtr)); + inputs.push_back(ov::Tensor(ovCoreNode->get_input_element_type(i), shape, srcDataPtr)); } return inputs; } @@ -136,9 +121,9 @@ ov::TensorVector Reference::prepareOutputs() const { ov::TensorVector outputs; for (size_t i = 0; i < outputShapes.size(); i++) { void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().getData(); - ov::Shape shape = ngraphOp->get_output_partial_shape(i).rank().get_length() == 0 ? + ov::Shape shape = ovCoreNode->get_output_partial_shape(i).rank().get_length() == 0 ? ov::Shape{} : getChildEdgesAtPort(i)[0]->getMemory().getStaticDims(); - outputs.push_back(ov::Tensor(ngraphOp->get_output_element_type(i), shape, dstDataPtr)); + outputs.push_back(ov::Tensor(ovCoreNode->get_output_element_type(i), shape, dstDataPtr)); } return outputs; } diff --git a/src/plugins/intel_cpu/src/nodes/reference.h b/src/plugins/intel_cpu/src/nodes/reference.h index 4c2a8a1310806f..c2453835229138 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.h +++ b/src/plugins/intel_cpu/src/nodes/reference.h @@ -12,7 +12,7 @@ namespace node { class Reference : public Node { public: - Reference(const std::shared_ptr& op, const GraphContext::CPtr context, const std::string& errorMessage); + Reference(const std::shared_ptr& op, const GraphContext::CPtr& context, const std::string& errorMessage); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -29,7 +29,7 @@ class Reference : public Node { ov::TensorVector prepareOutputs() const; private: - const std::shared_ptr ngraphOp; + const std::shared_ptr ovCoreNode; const std::string additionalErrorMessage; }; diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 9992f0f392b893..f453b7a5a51e0b 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -133,6 +133,7 @@ inline bool haveAttention(const dnnl::algorithm& alg) { const std::map RNN::weightsByinputDataType { // layer data type weights data type {memory::data_type::f32, memory::data_type::f32}, + {memory::data_type::f16, memory::data_type::f16}, {memory::data_type::bf16, memory::data_type::bf16}, {memory::data_type::u8, memory::data_type::s8}, {memory::data_type::s8, memory::data_type::s8}, @@ -505,6 +506,10 @@ void RNN::configurePortDataTypes() { if (one_of(memory::data_type::bf16, inDataTypes[xIdx], inDataTypes[hIdx])) inDataTypes[xIdx] = outDataTypes[yIdx] = outDataTypes[hoIdx] = inDataTypes[hIdx] = memory::data_type::bf16; // required by oneDNN. + if (one_of(memory::data_type::f16, inDataTypes[xIdx], inDataTypes[hIdx])) + // onednn doesn't have fp16 instance + inDataTypes[xIdx] = outDataTypes[yIdx] = outDataTypes[hoIdx] = inDataTypes[hIdx] = memory::data_type::f32; // required by oneDNN. + if (outDataTypes[yIdx] == memory::data_type::bf16 && one_of(inDataTypes[xIdx], memory::data_type::s8, memory::data_type::u8)) outDataTypes[yIdx] = memory::data_type::f32; // oneDNN does not support bf16 output precision for quantized rnn primitive yet } @@ -882,7 +887,7 @@ void RNN::copyWeightsData() { } const auto& dataType = inDataTypes[xIdx]; - if (dataType == memory::data_type::bf16) { + if (one_of(dataType, memory::data_type::bf16, memory::data_type::f16)) { fillWeights(gate_map, wIdx, rIdx); } else if (dataType == memory::data_type::f32) { // WA To avoid different weights layer and iter formats in FP32 case diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp index 3afe8aaa32c1d9..7add05741f04e1 100644 --- a/src/plugins/intel_cpu/src/nodes_factory.cpp +++ b/src/plugins/intel_cpu/src/nodes_factory.cpp @@ -79,6 +79,7 @@ #include "nodes/experimental_detectron_generate_proposals_single_image.h" #include "nodes/generate_proposals.h" #include "nodes/embedding_bag_packed_sum.h" +#include "nodes/random_uniform.hpp" #include "nodes/reduce.h" #include "nodes/if.h" #include "nodes/ctc_greedy_decoder.h" @@ -180,6 +181,7 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(Unique, Type::Unique); INTEL_CPU_NODE(Ngram, Type::Ngram); INTEL_CPU_NODE(Interpolate, Type::Interpolate); + INTEL_CPU_NODE(RandomUniform, Type::RandomUniform); INTEL_CPU_NODE(Reduce, Type::Reduce); INTEL_CPU_NODE(Gather, Type::Gather); INTEL_CPU_NODE(NonMaxSuppression, Type::NonMaxSuppression); diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index ddf14ef59a7eab..96be8734ec0dce 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -38,6 +38,11 @@ #include #include +#if defined(OV_CPU_WITH_ACL) +#include "nodes/executors/acl/acl_ie_scheduler.hpp" +#include "arm_compute/runtime/CPP/CPPScheduler.h" +#endif + using namespace InferenceEngine; #define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: " @@ -137,11 +142,44 @@ class CPUSpecialSetup { }; #endif // __linux__ +#if defined(OV_CPU_WITH_ACL) +std::mutex Engine::SchedulerGuard::mutex; +std::weak_ptr Engine::SchedulerGuard::ptr; + +Engine::SchedulerGuard::SchedulerGuard() { +#if IE_THREAD == IE_THREAD_SEQ + // To save state for ACL cores in single-thread mode + arm_compute::Scheduler::set(arm_compute::Scheduler::Type::ST); +#else + arm_compute::Scheduler::set(std::make_shared()); +#endif +} + +std::shared_ptr Engine::SchedulerGuard::instance() { + std::lock_guard lock{SchedulerGuard::mutex}; + auto scheduler_guard_ptr = SchedulerGuard::ptr.lock(); + if (scheduler_guard_ptr == nullptr) { + SchedulerGuard::ptr = scheduler_guard_ptr = std::make_shared(); + } + return scheduler_guard_ptr; +} + +Engine::SchedulerGuard::~SchedulerGuard() { + // To save the state of scheduler after ACLScheduler has been executed + // TODO: find out the cause of the state + std::lock_guard lock{this->dest_mutex}; + arm_compute::Scheduler::set(arm_compute::Scheduler::Type::ST); +} +#endif + Engine::Engine() : deviceFullName(getDeviceFullName()), specialSetup(new CPUSpecialSetup) { _pluginName = "CPU"; extensionManager->AddExtension(std::make_shared()); +#if defined(OV_CPU_WITH_ACL) + scheduler_guard = SchedulerGuard::instance(); +#endif } Engine::~Engine() { diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 20c6d315a2c623..3e9d616dcec02c 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -63,6 +63,20 @@ class Engine : public InferenceEngine::IInferencePlugin { const std::string deviceFullName; std::shared_ptr specialSetup; + +#if defined(OV_CPU_WITH_ACL) + struct SchedulerGuard { + SchedulerGuard(); + ~SchedulerGuard(); + static std::shared_ptr instance(); + static std::mutex mutex; + // separate mutex for saving ACLScheduler state in destructor + mutable std::mutex dest_mutex; + static std::weak_ptr ptr; + }; + + std::shared_ptr scheduler_guard; +#endif }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp new file mode 100644 index 00000000000000..cca3c74cce86b0 --- /dev/null +++ b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp @@ -0,0 +1,47 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" +#include + +namespace ov { +namespace intel_cpu { +namespace node { + +// TODO: remove after fixing the issue 123011 +IShapeInfer::Result RandomUniformShapeInfer::infer( + const std::vector>& input_shapes, + const std::unordered_map& data_dependency) { + VectorDims dims; + const auto& mem = data_dependency.at(0); + const auto rank = mem->getShape().getElementsCount(); + auto shape_prc = mem->getDesc().getPrecision(); + switch (shape_prc) { + case InferenceEngine::Precision::I32: { + auto data = reinterpret_cast(mem->getData()); + dims.assign(data, data + rank); + } break; + case InferenceEngine::Precision::I64: { + auto data = reinterpret_cast(mem->getData()); + dims.assign(data, data + rank); + } break; + default: + OPENVINO_THROW("Unexpected Shape input precision: ", shape_prc); + } + + return {{dims}, ShapeInferStatus::success}; +} + +RandomUniformShapeInferFactory::RandomUniformShapeInferFactory(const std::shared_ptr& op) : m_op(op) { + OPENVINO_ASSERT(ov::is_type(m_op), + "Unexpected op type in RandomUniform shape inference factory: ", m_op->get_type_name()); +} + +ShapeInferPtr RandomUniformShapeInferFactory::makeShapeInfer() const { + return std::make_shared(); +} + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp new file mode 100644 index 00000000000000..ce87a966a9cbc9 --- /dev/null +++ b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shape_inference/shape_inference_cpu.hpp" +#include + +#pragma once + +namespace ov { +namespace intel_cpu { +namespace node { + +class RandomUniformShapeInfer : public ShapeInferEmptyPads { +public: + explicit RandomUniformShapeInfer() {} + IShapeInfer::Result infer( + const std::vector>& input_shapes, + const std::unordered_map& data_dependency) override; + + port_mask_t get_port_mask() const override { + return PortMask(0); + } +}; + +class RandomUniformShapeInferFactory : public ShapeInferFactory { +public: + explicit RandomUniformShapeInferFactory(const std::shared_ptr& op); + ShapeInferPtr makeShapeInfer() const override; + +private: + std::shared_ptr m_op; +}; + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 28756ba21664e1..f87bfb4f1b055f 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -195,14 +195,6 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo); CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs); - CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression); - CPU_SET_CALLBACK_COMMON(manager, - [](const_node_ptr &node) -> bool { - const auto outputs = node->get_output_target_inputs(0); - return outputs.size() != 1 || !is_type(outputs.begin()->get_node()); - }, - ov::pass::KeepConstAndDecompression); - const bool useLpt = !defaultPrecisions.empty(); if (useLpt) { CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions); @@ -233,17 +225,23 @@ void Transformations::PreLpt(const std::vector& defaultPrecis return false; } // TODO: Uncomment when group decompression is supported - // else if (ov::is_type(consumer)) { + // if (ov::is_type(consumer)) { // consumer = get_single_consumer(consumer); // if (consumer != nullptr && ov::is_type(consumer)) { // return false; // } // } + if (ov::is_type(consumer)) { + consumer = get_single_consumer(consumer); + if (consumer != nullptr && ov::is_type(consumer)) { + return false; + } + } return true; }, ov::pass::MarkDequantizationSubgraph); } - auto get_convert_precisions = []() { + auto get_convert_precisions = [&]() { precisions_map map = { {ov::element::i64, ov::element::i32}, {ov::element::u64, ov::element::i32}, @@ -251,7 +249,6 @@ void Transformations::PreLpt(const std::vector& defaultPrecis {ov::element::u16, ov::element::i32}, {ov::element::u32, ov::element::i32}, {ov::element::f64, ov::element::f32}, - {ov::element::f16, ov::element::f32}, {ov::element::boolean, ov::element::u8}, {ov::element::i4, ov::element::i8}, {ov::element::u4, ov::element::u8} @@ -259,12 +256,37 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // @todo should we always convert to f32 regardless of hardware support, as it is done for f16? if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) map.insert({ov::element::bf16, ov::element::f32}); - +#if defined(OV_CPU_ARM_ENABLE_FP16) + if (inferencePrecision != ov::element::f16) + map.insert({ov::element::f16, ov::element::f32}); +#else + map.insert({ov::element::f16, ov::element::f32}); +#endif return map; }; static const auto precisions = get_convert_precisions(); type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}}; +#if defined(OV_CPU_ARM_ENABLE_FP16) + if (inferencePrecision == ov::element::f16) { + precisions_map fp_convert_precision_map = { + {ov::element::f32, ov::element::f16} + }; + type_to_fuse_map empty_fuse_map = {}; + const bool keep_precision_sensitive_in_fp32 = true; + CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, fp_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32); + } +#endif + CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression); + CPU_SET_CALLBACK_COMMON(manager, + [](const_node_ptr &node) -> bool { + const auto outputs = node->get_output_target_inputs(0); + return outputs.size() != 1 || !is_type(outputs.begin()->get_node()); + }, + ov::pass::KeepConstAndDecompression); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion); CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations); CPU_REGISTER_PASS_COMMON(manager, ov::pass::WrapInterpolateIntoTransposes); diff --git a/src/plugins/intel_cpu/tests/functional/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/behavior/ov_plugin/properties.cpp index 0e959ab865a6ed..10c0a244fcca31 100644 --- a/src/plugins/intel_cpu/tests/functional/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/behavior/ov_plugin/properties.cpp @@ -173,13 +173,18 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) { ASSERT_EQ(false, value); } +#if defined(OV_CPU_ARM_ENABLE_FP16) + const auto expected_precision_for_performance_mode = ov::element::f16; +#else + const auto expected_precision_for_performance_mode = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32; +#endif + TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) { ov::Core ie; auto value = ov::element::f32; - const auto precision = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32; ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::inference_precision)); - ASSERT_EQ(precision, value); + ASSERT_EQ(expected_precision_for_performance_mode, value); const auto forcedPrecision = ov::element::f32; @@ -210,8 +215,6 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigEnableProfiling) { ASSERT_EQ(enableProfiling, value); } -const auto expected_precision_for_performance_mode = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32; - const auto bf16_if_can_be_emulated = InferenceEngine::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32; using ExpectedModeAndType = std::pair; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/infer_request_dynamic.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/infer_request_dynamic.cpp index fa66f4a2c7801d..9cbe69255db0e0 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/infer_request_dynamic.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/infer_request_dynamic.cpp @@ -11,11 +11,12 @@ using namespace ov::test::behavior; namespace { const std::vector configs = { - {} + {{ov::hint::inference_precision.name(), ov::element::f32}} }; const std::vector HeteroConfigs = { - {ov::device::priorities(ov::test::utils::DEVICE_CPU)} + {{ov::hint::inference_precision.name(), ov::element::f32}, + {ov::device::priorities(ov::test::utils::DEVICE_CPU)}}, }; std::shared_ptr getFunction1() { diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp index 2c8678165426b3..567c877be8e8ab 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp @@ -10,11 +10,12 @@ using namespace ov::test::behavior; namespace { const std::vector configs = { - {} + {{ov::hint::inference_precision.name(), ov::element::f32}} }; const std::vector HeteroConfigs = { - {ov::device::priorities(ov::test::utils::DEVICE_CPU)} + {{ov::hint::inference_precision.name(), ov::element::f32}, + {ov::device::priorities(ov::test::utils::DEVICE_CPU)}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVIterationChaining, diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/pooling.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/pooling.cpp index c57694a072f9e7..6d4b6a72e5819a 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/pooling.cpp @@ -4,209 +4,184 @@ #include -#include "single_layer_tests/pooling.hpp" +#include "single_op_tests/pooling.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -// Common params -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, -// InferenceEngine::Precision::FP16, // "[NOT_IMPLEMENTED] Input image format FP16 is not supported yet... - InferenceEngine::Precision::U8, -// InferenceEngine::Precision::I8 // Too much cases -}; +using ov::test::PoolingLayerTest; +using ov::test::MaxPoolingV8LayerTest; +using ov::test::utils::PoolingTypes; +using ov::test::poolSpecificParams; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP16 +const std::vector model_types = { + ov::element::f16 }; const std::vector> kernels = {{3, 3}, {3, 5}}; -const std::vector> kernel3D = {{2, 2, 2}}; +const std::vector> kernel_3d = {{2, 2, 2}}; const std::vector> strides = {{1, 1}, {1, 2}, {2, 1}, {2, 2}}; -const std::vector> strides3D = {{1, 1, 1}, - {2, 2, 2}}; -const std::vector> stridess3D = {{2, 2, 2}}; -const std::vector> padBegins = {{0, 0}, - {0, 2}}; -const std::vector> padBegins3D = {{0, 0, 0}}; -const std::vector> padEnds = {{0, 0}, - {0, 2}}; -const std::vector> padEnds3D = {{0, 0, 0}}; -const std::vector roundingTypes = {ngraph::op::RoundingType::CEIL, - ngraph::op::RoundingType::FLOOR}; + +const std::vector> strides_3d = {{1, 1, 1}, + {2, 2, 2}}; + +const std::vector> pad_begins = {{0, 0}, + {0, 2}}; + +const std::vector> pad_begins_3d = {{0, 0, 0}}; + +const std::vector> pad_ends = {{0, 0}, + {0, 2}}; + +const std::vector> pad_ends_3d = {{0, 0, 0}}; + ////* ========== Max Polling ========== */ /* +========== Explicit Pad Floor Rounding ========== */ +std::vector input_shapes_static = {{1, 3, 30, 30}}; + const auto maxPool_ExplicitPad_FloorRounding_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX), + ::testing::Values(PoolingTypes::MAX), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_FloorRounding, PoolingLayerTest, ::testing::Combine( maxPool_ExplicitPad_FloorRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* +========== Same Upper Pad Floor Rounding ========== */ const auto maxPool_SameUpperPad_FloorRounding_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX), + ::testing::Values(PoolingTypes::MAX), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::SAME_UPPER), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::SAME_UPPER), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_SameUpperPad_FloorRounding, PoolingLayerTest, ::testing::Combine( maxPool_SameUpperPad_FloorRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* +========== Same Lower Pad Floor Rounding ========== */ const auto maxPool_SameLowerPad_FloorRounding_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX), + ::testing::Values(PoolingTypes::MAX), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::SAME_LOWER), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::SAME_LOWER), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_SameLowerPad_FloorRounding, PoolingLayerTest, ::testing::Combine( maxPool_SameUpperPad_FloorRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* ========== Explicit Pad Floor Rounding 5D input========== */ + +std::vector input_shapes_5d_static = {{32, 32, 2, 2, 2}}; + const auto maxPool_ExplicitPad_FloorRounding_5Dinput_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX), - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(PoolingTypes::MAX), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_FloorRounding_5Dinput, PoolingLayerTest, ::testing::Combine( maxPool_ExplicitPad_FloorRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 2})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* ========== Same Upper Pad Floor Rounding 5D input========== */ const auto maxPool_SameUpperPad_FloorRounding_5Dinput_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX), - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::SAME_UPPER), + ::testing::Values(PoolingTypes::MAX), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::SAME_UPPER), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_SameUpperPad_FloorRounding_5Dinput, PoolingLayerTest, ::testing::Combine( maxPool_SameUpperPad_FloorRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 2})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* ========== Same Lower Pad Ceil Rounding 5D input========== */ const auto maxPool_SameLowerPad_CeilRounding_5Dinput_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX), - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::op::RoundingType::CEIL), - ::testing::Values(ngraph::op::PadType::SAME_LOWER), + ::testing::Values(PoolingTypes::MAX), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::op::RoundingType::CEIL), + ::testing::Values(ov::op::PadType::SAME_LOWER), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_SameLowerPad_CeilRounding_5Dinput, PoolingLayerTest, ::testing::Combine( maxPool_SameUpperPad_FloorRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 2})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* ========== Explicit Pad Ceil Rounding ========== */ const auto maxPool_ExplicitPad_CeilRounding_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX), + ::testing::Values(PoolingTypes::MAX), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::op::RoundingType::CEIL), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::op::RoundingType::CEIL), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_CeilRounding, PoolingLayerTest, ::testing::Combine( maxPool_ExplicitPad_CeilRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); @@ -214,55 +189,49 @@ INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_CeilRounding, PoolingLayerTes ////* ========== Avg Pooling ========== */ /* +========== Explicit Pad Ceil Rounding ========== */ const auto avgPoolExplicitPadCeilRoundingParams = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::AVG), + ::testing::Values(PoolingTypes::AVG), ::testing::ValuesIn(kernels), - // TODO: Non 1 strides fails in ngraph reference implementation with error "The end corner is out of bounds at axis 3" thrown in the test body. + // TODO: Non 1 strides fails in reference implementation with error "The end corner is out of bounds at axis 3" thrown in the test body. ::testing::ValuesIn(strides), ::testing::ValuesIn(std::vector>({{0, 0}, {1, 1}, {0, 1}})), ::testing::ValuesIn(std::vector>({{0, 0}, {1, 1}, {0, 1}})), - ::testing::Values(ngraph::op::RoundingType::CEIL), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::RoundingType::CEIL), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::Values(true, false) ); INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_ExplicitPad_CeilRounding, PoolingLayerTest, ::testing::Combine( avgPoolExplicitPadCeilRoundingParams, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); -std::vector psParams({poolSpecificParams(ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {0, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false), - poolSpecificParams(ngraph::helpers::PoolingTypes::AVG, {7, 7}, {1, 1}, {0, 0}, {1, 1}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false)}); +std::vector psParams({poolSpecificParams(PoolingTypes::AVG, {2, 2}, {2, 2}, {0, 0}, {0, 0}, + ov::op::RoundingType::CEIL, ov::op::PadType::EXPLICIT, false), + poolSpecificParams(PoolingTypes::AVG, {7, 7}, {1, 1}, {0, 0}, {1, 1}, + ov::op::RoundingType::CEIL, ov::op::PadType::EXPLICIT, false)}); + +std::vector input_shapes_explicit_pad_ceil_rounding_corner_static = {{1, 3, 30, 30}}; INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_ExplicitPad_CeilRounding_corner, PoolingLayerTest, ::testing::Combine( ::testing::ValuesIn(psParams), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 1024, 6, 6})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_explicit_pad_ceil_rounding_corner_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* +========== Explicit Pad Floor Rounding ========== */ const auto avgPoolExplicitPadFloorRoundingParams = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::AVG), + ::testing::Values(PoolingTypes::AVG), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), ::testing::ValuesIn(std::vector>({{0, 0}, {1, 1}})), ::testing::ValuesIn(std::vector>({{0, 0}, {1, 1}})), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::Values(true, false) ); @@ -270,114 +239,96 @@ const auto avgPoolExplicitPadFloorRoundingParams = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_ExplicitPad_FloorRounding, PoolingLayerTest, ::testing::Combine( avgPoolExplicitPadFloorRoundingParams, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* ========== Explicit Pad Floor Rounding 5D input========== */ const auto avgPool_ExplicitPad_FloorRounding_5Dinput_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::AVG), - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(PoolingTypes::AVG), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::Values(true, false) ); +std::vector input_shapes_5d_2_static = {{32, 32, 2, 2, 4}}; + INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_ExplicitPad_FloorRounding_5Dinput, PoolingLayerTest, ::testing::Combine( avgPool_ExplicitPad_FloorRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 4})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_2_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* ========== Same Upper Pad Floor Rounding 5D input========== */ const auto avgPool_SameUpperPad_FloorRounding_5Dinput_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::AVG), - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::SAME_UPPER), + ::testing::Values(PoolingTypes::AVG), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::SAME_UPPER), ::testing::Values(true) ); INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_SameUpperPad_FloorRounding_5Dinput, PoolingLayerTest, ::testing::Combine( avgPool_SameUpperPad_FloorRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 4})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_2_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); /* ========== Same Lower Pad Ceil Rounding 5D input========== */ const auto avgPool_SameLowerPad_CeilRounding_5Dinput_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::AVG), - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::op::RoundingType::CEIL), - ::testing::Values(ngraph::op::PadType::SAME_LOWER), + ::testing::Values(PoolingTypes::AVG), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::op::RoundingType::CEIL), + ::testing::Values(ov::op::PadType::SAME_LOWER), ::testing::Values(true) ); INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_SameLowerPad_CeilRounding_5Dinput, PoolingLayerTest, ::testing::Combine( avgPool_SameLowerPad_CeilRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 2})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); ////* ========== Max Pooling V8 ========== */ const std::vector> dilation = {{1, 1}, {2, 2}}; -const std::vector> dilation3D = {{1, 1, 1}, {2, 2, 2}}; +const std::vector> dilation_3d = {{1, 1, 1}, {2, 2, 2}}; /* ========== Explicit Pad Floor Rounding ========== */ const auto maxPoolv8_ExplicitPad_FloorRounding_Params = ::testing::Combine( ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), ::testing::ValuesIn(dilation), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_ExplicitPad_FloorRounding, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_ExplicitPad_FloorRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); @@ -386,23 +337,19 @@ const auto maxPoolv8_SameUpperPad_FloorRounding_Params = ::testing::Combine( ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), ::testing::ValuesIn(dilation), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::SAME_UPPER) + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::SAME_UPPER) ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolv8_SameUpperPad_FloorRounding, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_SameUpperPad_FloorRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); @@ -411,98 +358,82 @@ const auto maxPoolv8_SameLowerPad_FloorRounding_Params = ::testing::Combine( ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), ::testing::ValuesIn(dilation), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::SAME_LOWER) + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::SAME_LOWER) ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolv8_SameLowerPad_FloorRounding, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_SameLowerPad_FloorRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); /* ========= Explicit Pad Floor Rounding 5D input========== */ const auto maxPoolv8_ExplicitPad_FloorRounding_5Dinput_Params = ::testing::Combine( - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::Values(dilation3D[0]), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::Values(dilation_3d[0]), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolv8_ExplicitPad_FloorRounding_5Dinput, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_ExplicitPad_FloorRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 2})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); /* ========= Same Upper Pad Floor Rounding 5D input========== */ const auto maxPoolv8_SameUpperPad_FloorRounding_5Dinput_Params = ::testing::Combine( - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(dilation3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(dilation_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::FLOOR), - ::testing::Values(ngraph::op::PadType::SAME_UPPER) + ::testing::Values(ov::op::RoundingType::FLOOR), + ::testing::Values(ov::op::PadType::SAME_UPPER) ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolv8_SameUpperPad_FloorRounding_5Dinput, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_SameUpperPad_FloorRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 2})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); /* ========= Same Lower Pad Ceil Rounding 5D input========== */ const auto maxPoolv8_SameLowerPad_CeilRounding_5Dinput_Params = ::testing::Combine( - ::testing::ValuesIn(kernel3D), - ::testing::ValuesIn(strides3D), - ::testing::ValuesIn(dilation3D), - ::testing::ValuesIn(padBegins3D), - ::testing::ValuesIn(padEnds3D), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::ValuesIn(kernel_3d), + ::testing::ValuesIn(strides_3d), + ::testing::ValuesIn(dilation_3d), + ::testing::ValuesIn(pad_begins_3d), + ::testing::ValuesIn(pad_ends_3d), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::CEIL), - ::testing::Values(ngraph::op::PadType::SAME_LOWER) + ::testing::Values(ov::op::RoundingType::CEIL), + ::testing::Values(ov::op::PadType::SAME_LOWER) ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolv8_SameLowerPad_CeilRounding_5Dinput, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_SameLowerPad_CeilRounding_5Dinput_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({32, 32, 2, 2, 2})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); @@ -511,49 +442,41 @@ const auto maxPoolv8_ExplicitPad_CeilRounding_Params = ::testing::Combine( ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), ::testing::ValuesIn(dilation), - ::testing::ValuesIn(padBegins), - ::testing::ValuesIn(padEnds), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::ValuesIn(pad_begins), + ::testing::ValuesIn(pad_ends), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::CEIL), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::RoundingType::CEIL), + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolv8_ExplicitPad_CeilRounding, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_ExplicitPad_CeilRounding_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); ////* ========== Avg and Max Polling Cases ========== */ /* ========== Valid Pad Rounding Not Applicable ========== */ const auto allPools_ValidPad_Params = ::testing::Combine( - ::testing::Values(ngraph::helpers::PoolingTypes::MAX, ngraph::helpers::PoolingTypes::AVG), + ::testing::Values(PoolingTypes::MAX, PoolingTypes::AVG), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), ::testing::Values(std::vector({0, 0})), ::testing::Values(std::vector({0, 0})), ::testing::Values( - ngraph::op::RoundingType::FLOOR), // placeholder value - Rounding Type not applicable for Valid pad type - ::testing::Values(ngraph::op::PadType::VALID), + ov::op::RoundingType::FLOOR), // placeholder value - Rounding Type not applicable for Valid pad type + ::testing::Values(ov::op::PadType::VALID), ::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling ); INSTANTIATE_TEST_SUITE_P(smoke_MAX_and_AVGPool_ValidPad, PoolingLayerTest, ::testing::Combine( allPools_ValidPad_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), PoolingLayerTest::getTestCaseName); @@ -563,21 +486,17 @@ const auto maxPoolv8_ValidPad_Params = ::testing::Combine( ::testing::ValuesIn(dilation), ::testing::Values(std::vector({0, 0})), ::testing::Values(std::vector({0, 0})), - ::testing::Values(ngraph::element::Type_t::i32), + ::testing::Values(ov::element::i32), ::testing::Values(0), - ::testing::Values(ngraph::op::RoundingType::FLOOR), // placeholder value - Rounding Type not applicable for Valid pad type - ::testing::Values(ngraph::op::PadType::VALID) + ::testing::Values(ov::op::RoundingType::FLOOR), // placeholder value - Rounding Type not applicable for Valid pad type + ::testing::Values(ov::op::PadType::VALID) ); INSTANTIATE_TEST_SUITE_P(smoke_MAXPoolv8_ValidPad, MaxPoolingV8LayerTest, ::testing::Combine( maxPoolv8_ValidPad_Params, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), MaxPoolingV8LayerTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp index 7009da247635e1..58eaa5174ee107 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp @@ -4,30 +4,32 @@ #include -#include "single_layer_tests/roi_align.hpp" +#include "single_op_tests/roi_align.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ROIAlignLayerTest; +namespace { -const std::vector netPRCs = { - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32 +const std::vector model_types = { + ov::element::f16, + ov::element::f32 }; const auto ROIAlignCases_average = ::testing::Combine( ::testing::ValuesIn( - std::vector> { - { 3, 8, 16, 16 }, - { 2, 1, 16, 16 }, - { 2, 1, 8, 16 }}), - ::testing::Values(std::vector{ 2, 4 }), + ov::test::static_shapes_to_test_representation( + std::vector>{ + {{ 3, 8, 16, 16 }}, + {{ 2, 1, 16, 16 }}, + {{ 2, 1, 8, 16 }}})), + ::testing::Values(ov::Shape{ 2, 4 }), ::testing::Values(2), ::testing::Values(2), ::testing::ValuesIn(std::vector { 1, 0.625 }), ::testing::Values(2), ::testing::Values("avg"), - ::testing::ValuesIn(netPRCs), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU) ); @@ -35,18 +37,21 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_average, ROIAlignLayerTest, ROIAlig const auto ROIAlignCases_max = ::testing::Combine( ::testing::ValuesIn( - std::vector> { - { 2, 8, 20, 20 }, - { 2, 1, 20, 20 }, - { 2, 1, 10, 20 }}), - ::testing::Values(std::vector{ 2, 4 }), + ov::test::static_shapes_to_test_representation( + std::vector>{ + {{ 2, 8, 20, 20 }}, + {{ 2, 1, 20, 20 }}, + {{ 2, 1, 10, 20 }}})), + ::testing::Values(ov::Shape{ 2, 4 }), ::testing::Values(2), ::testing::Values(2), ::testing::ValuesIn(std::vector { 1, 0.625 }), ::testing::Values(2), ::testing::Values("max"), - ::testing::ValuesIn(netPRCs), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU) ); INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max, ROIAlignLayerTest, ROIAlignCases_max, ROIAlignLayerTest::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_pooling.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_pooling.cpp index ff2be946948c6a..ca68bc6fa4d5ea 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/roi_pooling.cpp @@ -4,62 +4,72 @@ #include -#include "single_layer_tests/roi_pooling.hpp" +#include "single_op_tests/roi_pooling.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ROIPoolingLayerTest; -const std::vector> inShapes = { - {1, 3, 8, 8}, - {3, 4, 50, 50} +namespace { + +const std::vector param_shapes = { + {{1, 3, 8, 8}}, + {{3, 4, 50, 50}} }; -const std::vector> pooledShapes_max = { - {1, 1}, - {2, 2}, - {3, 3}, - {6, 6} +const std::vector coord_shapes = { + {{1, 5}}, + {{3, 5}}, + {{5, 5}} }; -const std::vector> pooledShapes_bilinear = { - {1, 1}, - {2, 2}, - {3, 3}, - {6, 6} +auto input_shapes = [](const std::vector& in1, const std::vector& in2) { + std::vector> res; + for (const auto& sh1 : in1) + for (const auto& sh2 : in2) + res.push_back(ov::test::static_shapes_to_test_representation({sh1, sh2})); + return res; +}(param_shapes, coord_shapes); + +const std::vector pooled_shapes_max = { + {{1, 1}}, + {{2, 2}}, + {{3, 3}}, + {{6, 6}} }; -const std::vector> coordShapes = { - {1, 5}, - {3, 5}, - {5, 5} +const std::vector pooled_shapes_bilinear = { + {{1, 1}}, + {{2, 2}}, + {{3, 3}}, + {{6, 6}} }; -const std::vector netPRCs = { - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32 +const std::vector model_types = { + ov::element::f16, + ov::element::f32 }; const std::vector spatial_scales = {0.625f, 1.f}; const auto test_ROIPooling_max = ::testing::Combine( - ::testing::ValuesIn(inShapes), - ::testing::ValuesIn(coordShapes), - ::testing::ValuesIn(pooledShapes_max), + ::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(pooled_shapes_max), ::testing::ValuesIn(spatial_scales), - ::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_MAX), - ::testing::ValuesIn(netPRCs), + ::testing::Values(ov::test::utils::ROIPoolingTypes::ROI_MAX), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU) ); const auto test_ROIPooling_bilinear = ::testing::Combine( - ::testing::ValuesIn(inShapes), - ::testing::ValuesIn(coordShapes), - ::testing::ValuesIn(pooledShapes_bilinear), + ::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(pooled_shapes_bilinear), ::testing::Values(spatial_scales[1]), - ::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR), - ::testing::ValuesIn(netPRCs), + ::testing::Values(ov::test::utils::ROIPoolingTypes::ROI_BILINEAR), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU) ); INSTANTIATE_TEST_SUITE_P(smoke_TestsROIPooling_max, ROIPoolingLayerTest, test_ROIPooling_max, ROIPoolingLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsROIPooling_bilinear, ROIPoolingLayerTest, test_ROIPooling_bilinear, ROIPoolingLayerTest::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index e629a715b69890..4eb40365fa95d7 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -24,8 +24,8 @@ std::vector disabledTestPatterns() { R"(.*(QuantGroupConv3D).*)", // TODO: Issue: 34518 R"(.*RangeLayerTest.*)", - R"(.*(RangeAddSubgraphTest).*Start=1.2.*Stop=(5.2|-5.2).*Step=(0.1|-0.1).*netPRC=FP16.*)", - R"(.*(RangeNumpyAddSubgraphTest).*netPRC=FP16.*)", + R"(.*(RangeAddSubgraphTest).*Start=1.2.*Stop=(5.2|-5.2).*Step=(0.1|-0.1).*ET=f16.*)", + R"(.*(RangeNumpyAddSubgraphTest).*ET=f16.*)", // TODO: Issue: 43793 R"(.*InferRequestPreprocessDynamicallyInSetBlobTest.*iPRC=0.*_iLT=1.*)", R"(.*InferRequestPreprocessDynamicallyInSetBlobTest.*oPRC=0.*_oLT=1.*)", @@ -189,16 +189,9 @@ std::vector disabledTestPatterns() { R"(smoke_LSTMSequenceCommon.*LSTMSequenceTest.Inference.*CONVERT_TO_TI.*)", // Issue: 122094 R"(smoke_Interpolate_Basic_Down_Sample_Tail/InterpolateLayerTest.Inference.*(asymmetric|align_corners).*f16.*)", + // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling. + R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)", }; -#if defined(__APPLE__) && defined(OPENVINO_ARCH_ARM64) - // Issue: 120950 - retVector.emplace_back(R"(.*smoke_TensorIteratorCommon/TensorIteratorTest.Inference.*_modelType=f16_targetDevice=CPU.*)"); - retVector.emplace_back(R"(.*smoke_CtcGreedyDecoderBasic/CTCGreedyDecoderLayerTest.Inference.*netPRC=f16.*trgDev=CPU.*)"); - retVector.emplace_back(R"(.*CTCGreedyDecoderSeqLenLayerTest.Inference.*dataPRC=f16.*trgDev=CPU.*)"); - // Issue: 122177 - retVector.emplace_back(R"(.*smoke_LSTMCellCommon/LSTMCellTest.Inference.*_modelType=f16.*)"); - retVector.emplace_back(R"(.*smoke_LSTMSequenceCommonZeroClip/LSTMSequenceTest.Inference.*_modelType=f16.*)"); -#endif #if defined(OPENVINO_ARCH_X86) retVector.emplace_back(R"(.*DetectionOutputLayerTest.*)"); @@ -227,6 +220,18 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)"); // int8 specific retVector.emplace_back(R"(smoke_Quantized.*)"); + +#if defined(OV_CPU_ARM_ENABLE_FP16) + // Issue: 123019 + retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_CeilRounding.*modelType=f16.*)"); + retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_FloorRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)"); + retVector.emplace_back(R"(smoke_AvgPool_SameUpperPad_FloorRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)"); + retVector.emplace_back(R"(smoke_AvgPool_SameLowerPad_CeilRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)"); + retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)"); + retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); + retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); +#endif + #endif #if defined(OPENVINO_ARCH_ARM) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mul_conv_fusion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mul_conv_fusion.cpp index 480a538c6c68ad..4ab6ddfbaefdb9 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mul_conv_fusion.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mul_conv_fusion.cpp @@ -2,263 +2,268 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "subgraph_tests/mul_conv_fusion.hpp" + #include -#include "subgraph_tests/mul_conv_fusion.hpp" #include "common_test_utils/test_constants.hpp" -#include - -using namespace SubgraphTestsDefinitions; +using namespace ov::test; namespace { - const std::vector types{ngraph::element::f32}; +const std::vector types{ov::element::f32}; - const std::vector const_shapes_fprop_1d{ - {}, - {1}, - {1, 1}, - {8, 1}, - {1, 1, 1}, - {1, 8, 1}, - }; +const std::vector const_shapes_fprop_1d{ + {}, + {1}, + {1, 1}, + {8, 1}, + {1, 1, 1}, + {1, 8, 1}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_Convolution_1D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::Convolution::get_type_info_static()), - ::testing::Values(ngraph::Shape{1, 8, 64}), - ::testing::Values(ngraph::Shape{64, 8, 1}), - ::testing::ValuesIn(const_shapes_fprop_1d), - ::testing::ValuesIn(types), - ::testing::Values(false), // Positive test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Convolution_1D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::Convolution::get_type_info_static()), + ::testing::Values(ov::Shape{1, 8, 64}), + ::testing::Values(ov::Shape{64, 8, 1}), + ::testing::ValuesIn(const_shapes_fprop_1d), + ::testing::ValuesIn(types), + ::testing::Values(false), // Positive test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector const_shapes_fprop_2d{ - {}, - {1}, - {1, 1}, - {1, 1, 1}, - {8, 1, 1}, - {1, 1, 1, 1}, - {1, 8, 1, 1}, - }; +const std::vector const_shapes_fprop_2d{ + {}, + {1}, + {1, 1}, + {1, 1, 1}, + {8, 1, 1}, + {1, 1, 1, 1}, + {1, 8, 1, 1}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_Convolution_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::Convolution::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 8, 14, 14}), - ::testing::Values(ngraph::Shape{2, 8, 7, 7}), - ::testing::ValuesIn(const_shapes_fprop_2d), - ::testing::ValuesIn(types), - ::testing::Values(false), // Positive test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Convolution_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::Convolution::get_type_info_static()), + ::testing::Values(ov::Shape{2, 8, 14, 14}), + ::testing::Values(ov::Shape{2, 8, 7, 7}), + ::testing::ValuesIn(const_shapes_fprop_2d), + ::testing::ValuesIn(types), + ::testing::Values(false), // Positive test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector const_shapes_fprop_2d_kernel_same_as_input{ - {7}, - {1, 7}, - {1, 1, 7}, - {8, 1, 7}, - {1, 1, 1, 7}, - {1, 8, 1, 7}, - {7, 1}, - {1, 7, 1}, - {8, 7, 1}, - {1, 1, 7, 1}, - {1, 8, 7, 1}, - {1, 1, 7, 7}, - {1, 8, 7, 7}, - }; +const std::vector const_shapes_fprop_2d_kernel_same_as_input{ + {7}, + {1, 7}, + {1, 1, 7}, + {8, 1, 7}, + {1, 1, 1, 7}, + {1, 8, 1, 7}, + {7, 1}, + {1, 7, 1}, + {8, 7, 1}, + {1, 1, 7, 1}, + {1, 8, 7, 1}, + {1, 1, 7, 7}, + {1, 8, 7, 7}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_Convolution_2D_kernel_same_as_input, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::Convolution::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 8, 7, 7}), - ::testing::Values(ngraph::Shape{3, 8, 7, 7}), - ::testing::ValuesIn(const_shapes_fprop_2d_kernel_same_as_input), - ::testing::ValuesIn(types), - ::testing::Values(false), // Positive test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Convolution_2D_kernel_same_as_input, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::Convolution::get_type_info_static()), + ::testing::Values(ov::Shape{2, 8, 7, 7}), + ::testing::Values(ov::Shape{3, 8, 7, 7}), + ::testing::ValuesIn(const_shapes_fprop_2d_kernel_same_as_input), + ::testing::ValuesIn(types), + ::testing::Values(false), // Positive test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector const_shapes_conv_bprop{ - {}, - {1}, - {1, 1}, - {1, 1, 1}, - {1, 1, 1, 1}, - {3, 1, 1}, - {1, 3, 1, 1}, - }; - INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::ConvolutionBackpropData::get_type_info_static()), - ::testing::Values(ngraph::Shape{4, 3, 64, 64}), - ::testing::Values(ngraph::Shape{3, 20, 3, 3}), - ::testing::ValuesIn(const_shapes_conv_bprop), - ::testing::ValuesIn(types), - ::testing::Values(false), // Positive test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +const std::vector const_shapes_conv_bprop{ + {}, + {1}, + {1, 1}, + {1, 1, 1}, + {1, 1, 1, 1}, + {3, 1, 1}, + {1, 3, 1, 1}, +}; +INSTANTIATE_TEST_SUITE_P( + smoke_ConvolutionBackpropData_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::ConvolutionBackpropData::get_type_info_static()), + ::testing::Values(ov::Shape{4, 3, 64, 64}), + ::testing::Values(ov::Shape{3, 20, 3, 3}), + ::testing::ValuesIn(const_shapes_conv_bprop), + ::testing::ValuesIn(types), + ::testing::Values(false), // Positive test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector const_shapes_group_conv{ - {}, - {1}, - {1, 1}, - {1, 1, 1}, - {12, 1, 1}, - {1, 1, 1, 1}, - {1, 12, 1, 1}, - }; +const std::vector const_shapes_group_conv{ + {}, + {1}, + {1, 1}, + {1, 1, 1}, + {12, 1, 1}, + {1, 1, 1, 1}, + {1, 12, 1, 1}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::GroupConvolution::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 14, 14}), - ::testing::Values(ngraph::Shape{4, 5, 3, 7, 7}), - ::testing::ValuesIn(const_shapes_group_conv), - ::testing::ValuesIn(types), - ::testing::Values(false), // Positive test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::GroupConvolution::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 14, 14}), + ::testing::Values(ov::Shape{4, 5, 3, 7, 7}), + ::testing::ValuesIn(const_shapes_group_conv), + ::testing::ValuesIn(types), + ::testing::Values(false), // Positive test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector const_shapes_group_conv_kernel_same_as_input{ - {14}, - {1, 14}, - {1, 1, 14}, - {12, 1, 14}, - {1, 1, 1, 14}, - {1, 12, 1, 14}, - {14, 1}, - {1, 14, 1}, - {12, 14, 1}, - {1, 1, 14, 1}, - {1, 12, 14, 1}, - {1, 1, 14, 14}, - {1, 12, 14, 14}, - }; +const std::vector const_shapes_group_conv_kernel_same_as_input{ + {14}, + {1, 14}, + {1, 1, 14}, + {12, 1, 14}, + {1, 1, 1, 14}, + {1, 12, 1, 14}, + {14, 1}, + {1, 14, 1}, + {12, 14, 1}, + {1, 1, 14, 1}, + {1, 12, 14, 1}, + {1, 1, 14, 14}, + {1, 12, 14, 14}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution_2D_kernel_same_as_input, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::GroupConvolution::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 14, 14}), - ::testing::Values(ngraph::Shape{4, 5, 3, 14, 14}), - ::testing::ValuesIn(const_shapes_group_conv_kernel_same_as_input), - ::testing::ValuesIn(types), - ::testing::Values(false), // Positive test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution_2D_kernel_same_as_input, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::GroupConvolution::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 14, 14}), + ::testing::Values(ov::Shape{4, 5, 3, 14, 14}), + ::testing::ValuesIn(const_shapes_group_conv_kernel_same_as_input), + ::testing::ValuesIn(types), + ::testing::Values(false), // Positive test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector const_shapes_group_conv_bprop{ - {}, - {1}, - {1, 1}, - {1, 1, 1}, - {12, 1, 1}, - {1, 1, 1, 1}, - {1, 12, 1, 1}, - }; +const std::vector const_shapes_group_conv_bprop{ + {}, + {1}, + {1, 1}, + {1, 1, 1}, + {12, 1, 1}, + {1, 1, 1, 1}, + {1, 12, 1, 1}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolutionBackpropData_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::GroupConvolutionBackpropData::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 10, 10}), - ::testing::Values(ngraph::Shape{4, 3, 5, 2, 2}), - ::testing::ValuesIn(const_shapes_group_conv_bprop), - ::testing::ValuesIn(types), - ::testing::Values(false), // Positive test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_GroupConvolutionBackpropData_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::GroupConvolutionBackpropData::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 10, 10}), + ::testing::Values(ov::Shape{4, 3, 5, 2, 2}), + ::testing::ValuesIn(const_shapes_group_conv_bprop), + ::testing::ValuesIn(types), + ::testing::Values(false), // Positive test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector negative_const_shapes{ - {12, 64, 64}, - {2, 1, 1, 1}, - {1, 1, 64, 64}, - {1, 12, 64, 64}, - {2, 12, 64, 64}, - }; +const std::vector negative_const_shapes{ + {12, 64, 64}, + {2, 1, 1, 1}, + {1, 1, 64, 64}, + {1, 12, 64, 64}, + {2, 12, 64, 64}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_NegativeConvolution_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::Convolution::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 64, 64}), - ::testing::Values(ngraph::Shape{20, 12, 1, 1}), - ::testing::ValuesIn(negative_const_shapes), - ::testing::ValuesIn(types), - ::testing::Values(true), // Negative test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_NegativeConvolution_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::Convolution::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 64, 64}), + ::testing::Values(ov::Shape{20, 12, 1, 1}), + ::testing::ValuesIn(negative_const_shapes), + ::testing::ValuesIn(types), + ::testing::Values(true), // Negative test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_NegativeConvolutionBackpropData_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::ConvolutionBackpropData::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 64, 64}), - ::testing::Values(ngraph::Shape{12, 20, 3, 3}), - ::testing::ValuesIn(negative_const_shapes), - ::testing::ValuesIn(types), - ::testing::Values(true), // Negative test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_NegativeConvolutionBackpropData_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::ConvolutionBackpropData::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 64, 64}), + ::testing::Values(ov::Shape{12, 20, 3, 3}), + ::testing::ValuesIn(negative_const_shapes), + ::testing::ValuesIn(types), + ::testing::Values(true), // Negative test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_NegativeGroupConvolution_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::GroupConvolution::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 64, 64}), - ::testing::Values(ngraph::Shape{4, 5, 3, 1, 2}), - ::testing::ValuesIn(negative_const_shapes), - ::testing::ValuesIn(types), - ::testing::Values(true), // Negative test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_NegativeGroupConvolution_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::GroupConvolution::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 64, 64}), + ::testing::Values(ov::Shape{4, 5, 3, 1, 2}), + ::testing::ValuesIn(negative_const_shapes), + ::testing::ValuesIn(types), + ::testing::Values(true), // Negative test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_NegativeGroupConvolutionBackpropData_2D, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::GroupConvolutionBackpropData::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 64, 64}), - ::testing::Values(ngraph::Shape{4, 3, 5, 1, 1}), - ::testing::ValuesIn(negative_const_shapes), - ::testing::ValuesIn(types), - ::testing::Values(true), // Negative test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_NegativeGroupConvolutionBackpropData_2D, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::GroupConvolutionBackpropData::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 64, 64}), + ::testing::Values(ov::Shape{4, 3, 5, 1, 1}), + ::testing::ValuesIn(negative_const_shapes), + ::testing::ValuesIn(types), + ::testing::Values(true), // Negative test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - const std::vector negative_const_shapes_kernel_same_as_input{ - {7}, - {1, 7}, - {1, 1, 7}, - {12, 1, 7}, - {1, 1, 1, 7}, - {1, 12, 1, 7}, - {7, 1}, - {1, 7, 1}, - {12, 7, 1}, - {1, 1, 7, 1}, - {1, 12, 7, 1}, - {1, 1, 7, 7}, - {1, 12, 7, 7}, - }; +const std::vector negative_const_shapes_kernel_same_as_input{ + {7}, + {1, 7}, + {1, 1, 7}, + {12, 1, 7}, + {1, 1, 1, 7}, + {1, 12, 1, 7}, + {7, 1}, + {1, 7, 1}, + {12, 7, 1}, + {1, 1, 7, 1}, + {1, 12, 7, 1}, + {1, 1, 7, 7}, + {1, 12, 7, 7}, +}; - INSTANTIATE_TEST_SUITE_P(smoke_NegativeConvolutionBackpropData_2D_kernel_same_as_input, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::ConvolutionBackpropData::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 7, 7}), - ::testing::Values(ngraph::Shape{12, 20, 7, 7}), - ::testing::ValuesIn(negative_const_shapes_kernel_same_as_input), - ::testing::ValuesIn(types), - ::testing::Values(true), // Negative test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_NegativeConvolutionBackpropData_2D_kernel_same_as_input, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::ConvolutionBackpropData::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 7, 7}), + ::testing::Values(ov::Shape{12, 20, 7, 7}), + ::testing::ValuesIn(negative_const_shapes_kernel_same_as_input), + ::testing::ValuesIn(types), + ::testing::Values(true), // Negative test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_NegativeGroupConvolutionBackpropData_2D_kernel_same_as_input, MulConvFusion, - ::testing::Combine( - ::testing::Values(ngraph::opset8::GroupConvolutionBackpropData::get_type_info_static()), - ::testing::Values(ngraph::Shape{2, 12, 7, 7}), - ::testing::Values(ngraph::Shape{4, 3, 5, 7, 7}), - ::testing::ValuesIn(negative_const_shapes_kernel_same_as_input), - ::testing::ValuesIn(types), - ::testing::Values(true), // Negative test - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MulConvFusion::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_NegativeGroupConvolutionBackpropData_2D_kernel_same_as_input, + MulConvFusion, + ::testing::Combine(::testing::Values(ov::op::v1::GroupConvolutionBackpropData::get_type_info_static()), + ::testing::Values(ov::Shape{2, 12, 7, 7}), + ::testing::Values(ov::Shape{4, 3, 5, 7, 7}), + ::testing::ValuesIn(negative_const_shapes_kernel_same_as_input), + ::testing::ValuesIn(types), + ::testing::Values(true), // Negative test + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MulConvFusion::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp index f28990117a39d3..c46e92708949b0 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp @@ -2,31 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - #include "subgraph_tests/multiply_add.hpp" -using namespace SubgraphTestsDefinitions; +#include + +using namespace ov::test; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 -}; +const std::vector input_type = {ov::element::f32}; -const std::vector> inputShapes = { - {1, 3, 2, 2, 4, 5}, - {1, 3, 2, 2, 2, 4, 5}, - {1, 3, 2, 2, 2, 2, 4, 5}, - {1, 3, 2, 2, 2, 2, 2, 4, 5}, - {1, 3, 2, 2, 2, 2, 2, 2, 4, 5}, +const std::vector inputShapes = { + {1, 3, 2, 2, 4, 5}, + {1, 3, 2, 2, 2, 4, 5}, + {1, 3, 2, 2, 2, 2, 4, 5}, + {1, 3, 2, 2, 2, 2, 2, 4, 5}, + {1, 3, 2, 2, 2, 2, 2, 2, 4, 5}, }; -INSTANTIATE_TEST_SUITE_P(smoke_MultipleAdd_Nd, MultiplyAddLayerTest, - ::testing::Combine( - ::testing::ValuesIn(inputShapes), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MultiplyAddLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_MultipleAdd_Nd, + MultiplyAddLayerTest, + ::testing::Combine(::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(input_type), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MultiplyAddLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mvn_multiply_add.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mvn_multiply_add.cpp index 4fe0990bfba66a..f27050440a7cf9 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mvn_multiply_add.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/mvn_multiply_add.cpp @@ -4,91 +4,75 @@ #include "subgraph_tests/mvn_multiply_add.hpp" -using namespace SubgraphTestsDefinitions; -using namespace InferenceEngine; +using namespace ov::test; namespace { -const std::vector netPrecision = { - Precision::FP32 -}; +const std::vector netPrecision = {ov::element::f32}; -std::vector idxPrecision = { - Precision::I64 -}; +std::vector idxPrecision = {ov::element::i64}; -const std::vector acrossChannels = { - true, - false -}; +const std::vector acrossChannels = {true, false}; -const std::vector normalizeVariance = { - true, - false -}; +const std::vector normalizeVariance = {true, false}; -const std::vector epsilon = { - 0.000000001f -}; +const std::vector epsilon = {0.000000001f}; -const std::vector epsMode = { - "inside_sqrt", - "outside_sqrt" -}; +const std::vector epsMode = {"inside_sqrt", "outside_sqrt"}; -const std::vector> shapes_1D = { - std::pair{ SizeVector{5}, SizeVector{5}}, - std::pair{ SizeVector{64}, SizeVector{64}}, +const std::vector> shapes_1D = { + std::pair{ov::Shape{5}, ov::Shape{5}}, + std::pair{ov::Shape{64}, ov::Shape{64}}, }; -const std::vector> shapes_2D = { - std::pair{ SizeVector{1, 5}, SizeVector{1, 5}}, - std::pair{ SizeVector{2, 17}, SizeVector{1, 17}}, - std::pair{ SizeVector{9, 64}, SizeVector{1, 64}}, - std::pair{ SizeVector{5, 15}, SizeVector{1, 15}}, +const std::vector> shapes_2D = { + std::pair{ov::Shape{1, 5}, ov::Shape{1, 5}}, + std::pair{ov::Shape{2, 17}, ov::Shape{1, 17}}, + std::pair{ov::Shape{9, 64}, ov::Shape{1, 64}}, + std::pair{ov::Shape{5, 15}, ov::Shape{1, 15}}, }; -const std::vector> shapes_3D = { - std::pair{ SizeVector{1, 5, 8}, SizeVector{1, 5, 8}}, - std::pair{ SizeVector{2, 17, 9}, SizeVector{1, 1, 9}}, - std::pair{ SizeVector{1, 1, 10}, SizeVector{1, 1, 10}}, - std::pair{ SizeVector{2, 3, 3}, SizeVector{2, 3, 3}}, +const std::vector> shapes_3D = { + std::pair{ov::Shape{1, 5, 8}, ov::Shape{1, 5, 8}}, + std::pair{ov::Shape{2, 17, 9}, ov::Shape{1, 1, 9}}, + std::pair{ov::Shape{1, 1, 10}, ov::Shape{1, 1, 10}}, + std::pair{ov::Shape{2, 3, 3}, ov::Shape{2, 3, 3}}, }; -INSTANTIATE_TEST_SUITE_P(smoke_MVNMultiplyAdd_1D, MVNMultiplyAdd, - ::testing::Combine( - ::testing::ValuesIn(shapes_1D), - ::testing::ValuesIn(netPrecision), - ::testing::ValuesIn(idxPrecision), - ::testing::Values(std::vector{0}), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon), - ::testing::ValuesIn(epsMode), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MVNMultiplyAdd::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_MVNMultiplyAdd_2D, MVNMultiplyAdd, - ::testing::Combine( - ::testing::ValuesIn(shapes_2D), - ::testing::ValuesIn(netPrecision), - ::testing::ValuesIn(idxPrecision), - ::testing::Values(std::vector{1}), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon), - ::testing::ValuesIn(epsMode), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MVNMultiplyAdd::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_MVNMultiplyAdd_3D, MVNMultiplyAdd, - ::testing::Combine( - ::testing::ValuesIn(shapes_3D), - ::testing::ValuesIn(netPrecision), - ::testing::ValuesIn(idxPrecision), - ::testing::Values(std::vector{2}), - ::testing::ValuesIn(normalizeVariance), - ::testing::ValuesIn(epsilon), - ::testing::ValuesIn(epsMode), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - MVNMultiplyAdd::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_MVNMultiplyAdd_1D, + MVNMultiplyAdd, + ::testing::Combine(::testing::ValuesIn(shapes_1D), + ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(idxPrecision), + ::testing::Values(std::vector{0}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon), + ::testing::ValuesIn(epsMode), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MVNMultiplyAdd::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MVNMultiplyAdd_2D, + MVNMultiplyAdd, + ::testing::Combine(::testing::ValuesIn(shapes_2D), + ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(idxPrecision), + ::testing::Values(std::vector{1}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon), + ::testing::ValuesIn(epsMode), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MVNMultiplyAdd::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MVNMultiplyAdd_3D, + MVNMultiplyAdd, + ::testing::Combine(::testing::ValuesIn(shapes_3D), + ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(idxPrecision), + ::testing::Values(std::vector{2}), + ::testing::ValuesIn(normalizeVariance), + ::testing::ValuesIn(epsilon), + ::testing::ValuesIn(epsMode), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + MVNMultiplyAdd::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/parameter_result.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/parameter_result.cpp index 7f25b2ef54ef44..a70b3c7bbc3659 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/parameter_result.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/parameter_result.cpp @@ -11,12 +11,6 @@ using namespace ov::test; namespace { -INSTANTIATE_TEST_SUITE_P(smoke_Check, - ParameterResultSubgraphTestLegacyApi, - ::testing::Combine(::testing::Values(ov::test::InputShape{{1, 3, 10, 10}, {}}), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ParameterResultSubgraphTestBase::getTestCaseName); - const std::vector inputShapes = { ov::test::InputShape{{1, 3, 10, 10}, {{1, 3, 10, 10}, {1, 3, 10, 10}}}, ov::test::InputShape{{-1, -1, -1, -1}, {{1, 3, 10, 10}, {2, 5, 3, 10}, {1, 3, 10, 10}, {1, 3, 10, 10}}}, diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp index 4fe713ebc44187..8617d1e68c2742 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp @@ -2,42 +2,45 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "subgraph_tests/perm_conv_perm_concat.hpp" -#include "common_test_utils/test_constants.hpp" + +#include + namespace { -std::vector> input_shapes { +std::vector input_shapes{ {1, 1, 7, 32}, {1, 1, 8, 16}, }; -std::vector> kernel_shapes { +std::vector kernel_shapes{ {1, 3}, {1, 5}, }; -std::vector output_channels { +std::vector output_channels{ 32, 64, }; -std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, }; -std::map additional_config = { -}; -} // namespace - -namespace SubgraphTestsDefinitions { -INSTANTIATE_TEST_SUITE_P(smoke_basic, PermConvPermConcat, - ::testing::Combine( - ::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(input_shapes), - ::testing::ValuesIn(kernel_shapes), - ::testing::ValuesIn(output_channels), - ::testing::Values(additional_config)), - PermConvPermConcat::getTestCaseName); -} // namespace SubgraphTestsDefinitions +ov::AnyMap additional_config = {}; +} // namespace + +namespace ov { +namespace test { + +INSTANTIATE_TEST_SUITE_P(smoke_basic, + PermConvPermConcat, + ::testing::Combine(::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(kernel_shapes), + ::testing::ValuesIn(output_channels), + ::testing::Values(additional_config)), + PermConvPermConcat::getTestCaseName); +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/range_add.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/range_add.cpp index 409ba17f27e68f..5b19f8b34d7f81 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/range_add.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/range_add.cpp @@ -2,97 +2,76 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - #include "subgraph_tests/range_add.hpp" -#include "common_test_utils/test_constants.hpp" -using namespace SubgraphTestsDefinitions; +#include + +using namespace ov::test; namespace { -const std::vector positiveStart = { 1.0f, 1.2f }; -const std::vector positiveStop = { 5.0f, 5.2f }; -const std::vector positiveStep = { 1.0f, 0.1f }; +const std::vector positiveStart = {1.0f, 1.2f}; +const std::vector positiveStop = {5.0f, 5.2f}; +const std::vector positiveStep = {1.0f, 0.1f}; -const std::vector negativeStart = { 1.0f, 1.2f }; -const std::vector negativeStop = { -5.0f, -5.2f }; -const std::vector negativeStep = { -1.0f, -0.1f }; +const std::vector negativeStart = {1.0f, 1.2f}; +const std::vector negativeStop = {-5.0f, -5.2f}; +const std::vector negativeStep = {-1.0f, -0.1f}; -const std::vector trunc_start = { 1.2f, 1.9f }; -const std::vector trunc_stop = { 11.4f, 11.8f }; -const std::vector trunc_step = { 1.3f, 2.8f }; +const std::vector trunc_start = {1.2f, 1.9f}; +const std::vector trunc_stop = {11.4f, 11.8f}; +const std::vector trunc_step = {1.3f, 2.8f}; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 // "[NOT_IMPLEMENTED] Input image format FP16 is not supported yet... +const std::vector element_types = { + ov::element::f32, + ov::element::f16 // "[NOT_IMPLEMENTED] Input image format FP16 is not supported yet... }; // ------------------------------ V0 ------------------------------ -INSTANTIATE_TEST_SUITE_P(smoke_BasicPositive, RangeAddSubgraphTest, - ::testing::Combine( - ::testing::ValuesIn(positiveStart), - ::testing::ValuesIn(positiveStop), - ::testing::ValuesIn(positiveStep), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - RangeAddSubgraphTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_BasicPositive, + RangeAddSubgraphTest, + ::testing::Combine(::testing::ValuesIn(positiveStart), + ::testing::ValuesIn(positiveStop), + ::testing::ValuesIn(positiveStep), + ::testing::ValuesIn(element_types), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + RangeAddSubgraphTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_BasicNegative, RangeAddSubgraphTest, - ::testing::Combine( - ::testing::ValuesIn(negativeStart), - ::testing::ValuesIn(negativeStop), - ::testing::ValuesIn(negativeStep), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - RangeAddSubgraphTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_BasicNegative, + RangeAddSubgraphTest, + ::testing::Combine(::testing::ValuesIn(negativeStart), + ::testing::ValuesIn(negativeStop), + ::testing::ValuesIn(negativeStep), + ::testing::ValuesIn(element_types), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + RangeAddSubgraphTest::getTestCaseName); // ------------------------------ V4 ------------------------------ -INSTANTIATE_TEST_SUITE_P(smoke_BasicPositive, RangeNumpyAddSubgraphTest, - ::testing::Combine( - ::testing::ValuesIn(positiveStart), - ::testing::ValuesIn(positiveStop), - ::testing::ValuesIn(positiveStep), - ::testing::ValuesIn(netPrecisions), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - RangeNumpyAddSubgraphTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_BasicPositive, + RangeNumpyAddSubgraphTest, + ::testing::Combine(::testing::ValuesIn(positiveStart), + ::testing::ValuesIn(positiveStop), + ::testing::ValuesIn(positiveStep), + ::testing::ValuesIn(element_types), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + RangeNumpyAddSubgraphTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_BasicNegative, RangeNumpyAddSubgraphTest, - ::testing::Combine( - ::testing::ValuesIn(negativeStart), - ::testing::ValuesIn(negativeStop), - ::testing::ValuesIn(negativeStep), - ::testing::ValuesIn(netPrecisions), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - RangeNumpyAddSubgraphTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_BasicNegative, + RangeNumpyAddSubgraphTest, + ::testing::Combine(::testing::ValuesIn(negativeStart), + ::testing::ValuesIn(negativeStop), + ::testing::ValuesIn(negativeStep), + ::testing::ValuesIn(element_types), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + RangeNumpyAddSubgraphTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_BasicTruncateInputs, RangeNumpyAddSubgraphTest, - ::testing::Combine( - ::testing::ValuesIn(trunc_start), - ::testing::ValuesIn(trunc_stop), - ::testing::ValuesIn(trunc_step), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::I32), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - RangeNumpyAddSubgraphTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_BasicTruncateInputs, + RangeNumpyAddSubgraphTest, + ::testing::Combine(::testing::ValuesIn(trunc_start), + ::testing::ValuesIn(trunc_stop), + ::testing::ValuesIn(trunc_step), + ::testing::ValuesIn(element_types), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + RangeNumpyAddSubgraphTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/relu_shape_of.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/relu_shape_of.cpp index 18ad81aecc25d2..00559d09144d2d 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/relu_shape_of.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/relu_shape_of.cpp @@ -2,22 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - #include "subgraph_tests/relu_shape_of.hpp" -#include "common_test_utils/test_constants.hpp" -using namespace SubgraphTestsDefinitions; +#include + +using namespace ov::test; namespace { - const std::vector netPrecisions = { - InferenceEngine::Precision::I32 - }; +const std::vector input_types = {ov::element::i32}; - INSTANTIATE_TEST_SUITE_P(smoke_Check, ReluShapeOfSubgraphTest, - ::testing::Combine( - ::testing::ValuesIn(netPrecisions), - ::testing::Values(std::vector({20, 10, 10, 10})), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ReluShapeOfSubgraphTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Check, + ReluShapeOfSubgraphTest, + ::testing::Combine(::testing::ValuesIn(input_types), + ::testing::Values(ov::element::i64), + ::testing::Values(ov::Shape{20, 10, 10, 10}), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ReluShapeOfSubgraphTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp index 79743431999170..fa82dc5baa7f1e 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp @@ -2,47 +2,46 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "subgraph_tests/reshape_squeeze_reshape_relu.hpp" -#include "common_test_utils/test_constants.hpp" -using namespace SubgraphTestsDefinitions; +#include + +using namespace ov::test; namespace { - std::vector inputs{ - {{1, 1, 3}, {0, 1}}, - {{1, 1, 3}, {0}}, - {{1, 1, 3}, {1}}, - {{1, 3, 1}, {0, 2}}, - {{1, 3, 1}, {0}}, - {{1, 3, 1}, {2}}, - {{3, 1, 1}, {1, 2}}, - {{3, 1, 1}, {1}}, - {{3, 1, 1}, {2}}, - {{4, 1, 3, 1}, {1, 3}}, - {{4, 1, 1, 3}, {1, 2}}, - {{1, 4, 1, 3}, {0, 2}}, - {{1, 3, 5, 2, 1}, {0, 4}}, - {{3, 1, 2, 4, 4, 3}, {1}}, - {{1, 1, 1, 1, 1, 3}, {0, 1, 2, 3, 4}}, - {{1, 1, 1, 1, 1, 3}, {1, 3}}, - {{1}, {0}}, - }; +std::vector inputs{ + {{1, 1, 3}, {0, 1}}, + {{1, 1, 3}, {0}}, + {{1, 1, 3}, {1}}, + {{1, 3, 1}, {0, 2}}, + {{1, 3, 1}, {0}}, + {{1, 3, 1}, {2}}, + {{3, 1, 1}, {1, 2}}, + {{3, 1, 1}, {1}}, + {{3, 1, 1}, {2}}, + {{4, 1, 3, 1}, {1, 3}}, + {{4, 1, 1, 3}, {1, 2}}, + {{1, 4, 1, 3}, {0, 2}}, + {{1, 3, 5, 2, 1}, {0, 4}}, + {{3, 1, 2, 4, 4, 3}, {1}}, + {{1, 1, 1, 1, 1, 3}, {0, 1, 2, 3, 4}}, + {{1, 1, 1, 1, 1, 3}, {1, 3}}, + {{1}, {0}}, +}; - std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - }; +std::vector input_types = { + ov::element::f32, + ov::element::f16, +}; - const std::vector opTypes = { - ngraph::helpers::SqueezeOpType::SQUEEZE, - ngraph::helpers::SqueezeOpType::UNSQUEEZE - }; +const std::vector opTypes = {ov::test::utils::SqueezeOpType::SQUEEZE, + ov::test::utils::SqueezeOpType::UNSQUEEZE}; - INSTANTIATE_TEST_SUITE_P(smoke_reshape_squeeze_reshape_relu, ReshapeSqueezeReshapeRelu, - ::testing::Combine( - ::testing::ValuesIn(inputs), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(opTypes)), - ReshapeSqueezeReshapeRelu::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_reshape_squeeze_reshape_relu, + ReshapeSqueezeReshapeRelu, + ::testing::Combine(::testing::ValuesIn(inputs), + ::testing::ValuesIn(input_types), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(opTypes)), + ReshapeSqueezeReshapeRelu::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp index 8e347ad75c31d4..1d60e68187709e 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp @@ -2,28 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - #include "subgraph_tests/split_conv_concat.hpp" -#include "common_test_utils/test_constants.hpp" -using namespace SubgraphTestsDefinitions; +#include + +using namespace ov::test; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 -}; +const std::vector input_types = {ov::element::f32, ov::element::f16}; -INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, SplitConvConcat, - ::testing::Combine( - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::SizeVector({1, 6, 40, 40})), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - SplitConvConcat::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, + SplitConvConcat, + ::testing::Combine(::testing::ValuesIn(input_types), + ::testing::Values(ov::Shape{1, 6, 40, 40}), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + SplitConvConcat::getTestCaseName); } // namespace - - - - diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/variadic_split_pad.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/variadic_split_pad.cpp index 7386a46ef3868a..27d63ad27ff852 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/variadic_split_pad.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/variadic_split_pad.cpp @@ -4,60 +4,52 @@ #include "subgraph_tests/variadic_split_pad.hpp" -using namespace SubgraphTestsDefinitions; +using namespace ov::test; namespace { -const std::vector netPrecision = { - InferenceEngine::Precision::FP32 -}; +const std::vector input_type = {ov::element::f32}; -const std::vector shapes = { - {1, 8, 3, 2}, - {3, 8, 8, 8}, +const std::vector shapes = { + {1, 8, 3, 2}, + {3, 8, 8, 8}, }; const std::vector> connectedIndexes = { - {0}, - {0, 2}, - {0, 1, 3}, - {0, 1, 1, 0}, - {0, 0, 0, 1}, + {0}, + {0, 2}, + {0, 1, 3}, + {0, 1, 1, 0}, + {0, 0, 0, 1}, }; -const std::vector> numSplits = { - {2, 2, 2, 2}, - {1, 2, 4, 1}, - {3, 2, 2, 1} -}; +const std::vector> numSplits = {{2, 2, 2, 2}, {1, 2, 4, 1}, {3, 2, 2, 1}}; const std::vector> padsBegin = { - {0, 0, 0, 0}, - {0, 0, 1, 1}, + {0, 0, 0, 0}, + {0, 0, 1, 1}, }; const std::vector> padsEnd = { - {0, 0, 0, 0}, - {0, 0, 1, 1}, -}; - -const std::vector padMode = { - ngraph::helpers::PadMode::CONSTANT, - ngraph::helpers::PadMode::EDGE, - ngraph::helpers::PadMode::REFLECT, - ngraph::helpers::PadMode::SYMMETRIC + {0, 0, 0, 0}, + {0, 0, 1, 1}, }; -INSTANTIATE_TEST_SUITE_P(smoke_CPU, VariadicSplitPad, - ::testing::Combine( - ::testing::ValuesIn(shapes), - ::testing::Values(1), - ::testing::ValuesIn(numSplits), - ::testing::ValuesIn(connectedIndexes), - ::testing::ValuesIn(padsBegin), - ::testing::ValuesIn(padsEnd), - ::testing::ValuesIn(padMode), - ::testing::ValuesIn(netPrecision), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - VariadicSplitPad::getTestCaseName); +const std::vector padMode = {ov::op::PadMode::CONSTANT, + ov::op::PadMode::EDGE, + ov::op::PadMode::REFLECT, + ov::op::PadMode::SYMMETRIC}; + +INSTANTIATE_TEST_SUITE_P(smoke_CPU, + VariadicSplitPad, + ::testing::Combine(::testing::ValuesIn(shapes), + ::testing::Values(1), + ::testing::ValuesIn(numSplits), + ::testing::ValuesIn(connectedIndexes), + ::testing::ValuesIn(padsBegin), + ::testing::ValuesIn(padsEnd), + ::testing::ValuesIn(padMode), + ::testing::ValuesIn(input_type), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + VariadicSplitPad::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/pooling.cpp new file mode 100644 index 00000000000000..e40771146cd0ec --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/pooling.cpp @@ -0,0 +1,464 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" +#include "pooling.hpp" +#include "test_utils/cpu_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +std::string PoolingLayerCPUTest::getTestCaseName(const testing::TestParamInfo& obj) { + LayerTestsDefinitions::poolSpecificParams basicParamsSet; + InputShape inputShapes; + ElementType inPrc; + bool isInt8; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::tie(basicParamsSet, inputShapes, inPrc, isInt8, cpuParams, fusingParams) = obj.param; + + ngraph::helpers::PoolingTypes poolType; + std::vector kernel, stride; + std::vector padBegin, padEnd; + ngraph::op::PadType padType; + ngraph::op::RoundingType roundingType; + bool excludePad; + std::tie(poolType, kernel, stride, padBegin, padEnd, roundingType, padType, excludePad) = basicParamsSet; + + std::ostringstream results; + results << "IS=("; + results << ov::test::utils::partialShape2str({inputShapes.first}) << ")_"; + results << "TS="; + for (const auto& shape : inputShapes.second) { + results << ov::test::utils::vec2str(shape) << "_"; + } + results << "Prc=" << inPrc << "_"; + switch (poolType) { + case ngraph::helpers::PoolingTypes::MAX: + results << "MaxPool_"; + break; + case ngraph::helpers::PoolingTypes::AVG: + results << "AvgPool_"; + results << "ExcludePad=" << excludePad << "_"; + break; + } + results << "K" << ov::test::utils::vec2str(kernel) << "_"; + results << "S" << ov::test::utils::vec2str(stride) << "_"; + results << "PB" << ov::test::utils::vec2str(padBegin) << "_"; + results << "PE" << ov::test::utils::vec2str(padEnd) << "_"; + results << "Rounding=" << roundingType << "_"; + results << "AutoPad=" << padType << "_"; + results << "INT8=" << isInt8 << "_"; + + results << CPUTestsBase::getTestCaseName(cpuParams); + results << CpuTestWithFusing::getTestCaseName(fusingParams); + return results.str(); +} + +void PoolingLayerCPUTest::SetUp() { + targetDevice = ov::test::utils::DEVICE_CPU; + + LayerTestsDefinitions::poolSpecificParams basicParamsSet; + InputShape inputShapes; + ElementType inPrc; + bool isInt8; + CPUSpecificParams cpuParams; + fusingSpecificParams fusingParams; + std::tie(basicParamsSet, inputShapes, inPrc, isInt8, cpuParams, fusingParams) = this->GetParam(); + + ngraph::helpers::PoolingTypes poolType; + std::vector kernel, stride; + std::vector padBegin, padEnd; + ngraph::op::PadType padType; + ngraph::op::RoundingType roundingType; + bool excludePad; + std::tie(poolType, kernel, stride, padBegin, padEnd, roundingType, padType, excludePad) = basicParamsSet; + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + if (selectedType.empty()) { + selectedType = getPrimitiveType(); + } + if (isInt8) + selectedType = selectedType + "_I8"; + else + selectedType = makeSelectedTypeStr(selectedType, inPrc); + + init_input_shapes({inputShapes}); + + ov::ParameterVector params; + for (auto&& shape : inputDynamicShapes) { + params.push_back(std::make_shared(inPrc, shape)); + } + + std::shared_ptr poolInput = params[0]; + if (isInt8) { + ov::Shape newShape(poolInput->get_output_partial_shape(0).size(), 1); + poolInput = ngraph::builder::makeFakeQuantize(poolInput, inPrc, 256, newShape); + } + + std::shared_ptr pooling = ngraph::builder::makePooling(poolInput, + stride, + padBegin, + padEnd, + kernel, + roundingType, + padType, + excludePad, + poolType); + + function = makeNgraphFunction(inPrc, params, pooling, "PoolingCPU"); +} + +std::string MaxPoolingV8LayerCPUTest::getTestCaseName( + const testing::TestParamInfo& obj) { + LayerTestsDefinitions::maxPoolV8SpecificParams basicParamsSet; + InputShape inputShapes; + ElementType inPrc; + CPUSpecificParams cpuParams; + std::tie(basicParamsSet, inputShapes, inPrc, cpuParams) = obj.param; + + std::vector kernel, stride, dilation; + std::vector padBegin, padEnd; + ngraph::op::PadType padType; + ngraph::op::RoundingType roundingType; + ngraph::element::Type indexElementType; + int64_t axis; + std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = + basicParamsSet; + + std::ostringstream results; + results << "IS=("; + results << ov::test::utils::partialShape2str({inputShapes.first}) << ")_"; + results << "TS="; + for (const auto& shape : inputShapes.second) { + results << ov::test::utils::vec2str(shape) << "_"; + } + results << "Prc=" << inPrc << "_"; + results << "MaxPool_"; + results << "K" << ov::test::utils::vec2str(kernel) << "_"; + results << "S" << ov::test::utils::vec2str(stride) << "_"; + results << "D" << ov::test::utils::vec2str(dilation) << "_"; + results << "PB" << ov::test::utils::vec2str(padBegin) << "_"; + results << "PE" << ov::test::utils::vec2str(padEnd) << "_"; + results << "Rounding=" << roundingType << "_"; + results << "AutoPad=" << padType << "_"; + + results << CPUTestsBase::getTestCaseName(cpuParams); + return results.str(); +} + +void MaxPoolingV8LayerCPUTest::SetUp() { + targetDevice = ov::test::utils::DEVICE_CPU; + + LayerTestsDefinitions::maxPoolV8SpecificParams basicParamsSet; + InputShape inputShapes; + ElementType inPrc; + CPUSpecificParams cpuParams; + std::tie(basicParamsSet, inputShapes, inPrc, cpuParams) = this->GetParam(); + + std::vector kernel, stride, dilation; + std::vector padBegin, padEnd; + ngraph::op::PadType padType; + ngraph::op::RoundingType roundingType; + ngraph::element::Type indexElementType; + int64_t axis; + std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = + basicParamsSet; + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + if (selectedType.empty()) { + selectedType = getPrimitiveType(); + } + selectedType = makeSelectedTypeStr(selectedType, inPrc); + + init_input_shapes({inputShapes}); + + ov::ParameterVector params; + for (auto&& shape : inputDynamicShapes) { + params.push_back(std::make_shared(inPrc, shape)); + } + std::shared_ptr pooling = ngraph::builder::makeMaxPoolingV8(params[0], + stride, + dilation, + padBegin, + padEnd, + kernel, + roundingType, + padType, + indexElementType, + axis); + pooling->get_rt_info() = getCPUInfo(); + ngraph::ResultVector results{std::make_shared(pooling->output(0))}; + function = std::make_shared(results, params, "MaxPooling"); +} + +TEST_P(PoolingLayerCPUTest, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, "Pooling"); +} + +TEST_P(MaxPoolingV8LayerCPUTest, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, "Pooling"); +} + +namespace Pooling { + +// The combination of parameters: NCHW + CEIL gives an accuracy problem in ACL AvgPool +const ngraph::op::RoundingType expectedAvgRoundingType() { +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + return ngraph::op::RoundingType::FLOOR; +#else + return ngraph::op::RoundingType::CEIL; +#endif +} + +const std::vector& paramsMax3D() { + static const std::vector paramsMax3D = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {2}, {0}, {0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4}, {2}, {0}, {0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {1}, {0}, {0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, + }; + return paramsMax3D; +} + +const std::vector& paramsAvg3D() { + static const std::vector paramsAvg3D = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_UPPER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4}, {4}, {2}, {2}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, true }, + }; + return paramsAvg3D; +} + +const std::vector& inpOutPrecision() { + static const std::vector inpOutPrecision = {ElementType::f32/*, ElementType::bf16*/}; + return inpOutPrecision; +} + +const std::vector& paramsMax4D() { + static const std::vector paramsMax4D = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2}, {2, 2}, {0, 0}, {0, 0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2}, {2, 2}, {0, 0}, {0, 0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4, 2}, {2, 2}, {0, 0}, {0, 0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4, 2}, {2, 1}, {0, 0}, {0, 0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, + }; + return paramsMax4D; +} + +const std::vector& paramsMaxV84D() { + static const std::vector paramsMaxV84D = { + LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {1, 1}, {0, 0}, {0, 0}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER }, + }; + return paramsMaxV84D; +} + +const std::vector& inputShapes3D() { + static const std::vector inputShapes3D = { + { {}, {{3, 4, 64}} }, + { {}, {{2, 8, 12}} }, + { {}, {{1, 16, 12}} }, + { {}, {{1, 21, 4}} }, + { {}, {{1, 32, 8}} }, + { + // dynamic + {-1, -1, -1}, + // target + { + {1, 32, 8}, + {1, 21, 4}, + {2, 8, 12} + } + }, + { + // dynamic + {{1, 5}, {4, 32}, {1, 64}}, + // target + { + {3, 4, 64}, + {1, 16, 12}, + {1, 32, 8} + } + } + }; + return inputShapes3D; +} + +const std::vector& inputShapes4D() { + static const std::vector inputShapes4D = { + { {}, {{3, 4, 64, 64}} }, + { {}, {{2, 8, 8, 12}} }, + { {}, {{1, 16, 16, 12}} }, + { {}, {{1, 21, 8, 4}} }, + { {}, {{1, 32, 8, 8}} }, + { + // dynamic + {-1, -1, -1, -1}, + // target + { + {1, 32, 8, 8}, + {1, 21, 8, 4}, + {2, 8, 8, 12}, + {1, 96, 125, 125} + } + }, + { + // dynamic + {{1, 5}, {4, 32}, {1, 64}, {1, 64}}, + // target + { + {3, 4, 64, 64}, + {1, 16, 16, 12}, + {1, 32, 8, 8} + } + }, + { + // dynamic + {{1, 10}, 16, 8, 8}, + // target + { + {1, 16, 8, 8}, + {2, 16, 8, 8}, + } + } + }; + return inputShapes4D; +} + +const std::vector& inputShapes5D() { + static const std::vector inputShapes5D = { + { {}, {{1, 4, 16, 16, 16}} }, + { {}, {{2, 8, 8, 8, 8}} }, + { {}, {{2, 16, 12, 16, 20}} }, + { {}, {{1, 19, 16, 20, 8}} }, + { {}, {{1, 32, 16, 8, 12}} }, + { + // dynamic + {-1, -1, -1, -1, -1}, + // target + { + {2, 8, 8, 8, 8}, + {1, 19, 16, 20, 8}, + {1, 4, 16, 16, 16} + } + }, + { + // dynamic + {{1, 5}, {4, 32}, {1, 64}, {1, 64}, {1, 25}}, + // target + { + {1, 4, 16, 16, 16}, + {1, 32, 16, 8, 12}, + {3, 16, 4, 8, 3} + } + } + }; + return inputShapes5D; +} + +const std::vector& paramsMaxV85D() { + static const std::vector paramsMaxV85D = { + LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER }, + }; + return paramsMaxV85D; +} + +const std::vector& paramsAvg4D() { + static const std::vector paramsAvg4D = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_LOWER, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_UPPER, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_LOWER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_UPPER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {0, 0}, {0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4, 4}, {4, 4}, {2, 2}, {2, 2}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, true }, + }; + return paramsAvg4D; +} + +const std::vector& paramsAvg5D() { + static const std::vector paramsAvg5D = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_LOWER, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_UPPER, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_LOWER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::SAME_UPPER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3, 3, 3}, {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, true }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4, 4, 4}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, true }, + }; + return paramsAvg5D; +} + +const std::vector& paramsMax5D() { + static const std::vector paramsMax5D = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, + }; + return paramsMax5D; +} + +const std::vector& paramsAvg4D_Large() { + static const std::vector paramsAvg4D_Large = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {65, 65}, {65, 65}, {0, 0}, {0, 0}, + ngraph::op::RoundingType::FLOOR, ngraph::op::PadType::VALID, true }, + }; + return paramsAvg4D_Large; +} + +const std::vector& inputShapes4D_Large() { + static const std::vector inputShapes4D_Large = { + { + // dynamic + {-1, -1, -1, -1}, + // target + { + {1, 16, 65, 65}, + {1, 8, 130, 130}, + {1, 16, 65, 65} + } + }, + }; + return inputShapes4D_Large; +} + + +} // namespace Pooling +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/pooling.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/pooling.hpp new file mode 100644 index 00000000000000..ecf12a0360de1f --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/pooling.hpp @@ -0,0 +1,69 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ov_models/builders.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" +#include "shared_test_classes/single_layer/pooling.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +using namespace ov::test; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using poolLayerCpuTestParamsSet = std::tuple; + +using maxPoolV8LayerCpuTestParamsSet = std::tuple; + +class PoolingLayerCPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CpuTestWithFusing { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +class MaxPoolingV8LayerCPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +namespace Pooling { +const std::vector& inpOutPrecision(); +const ngraph::op::RoundingType expectedAvgRoundingType(); + +const std::vector& paramsMax3D(); +const std::vector& paramsAvg3D(); +const std::vector& paramsMax4D(); + +const std::vector& paramsMaxV84D(); +const std::vector& paramsMaxV85D(); + +const std::vector& inputShapes3D(); +const std::vector& inputShapes4D(); +const std::vector& inputShapes4D_Large(); +const std::vector& inputShapes5D(); + +const std::vector& paramsAvg4D(); +const std::vector& paramsAvg4D_Large(); +const std::vector& paramsAvg5D(); +const std::vector& paramsMax5D(); +} // namespace Pooling +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp new file mode 100644 index 00000000000000..2f9706e7d2562e --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp @@ -0,0 +1,265 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" +#include "ov_models/builders.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +std::string RandomUniformLayerTestCPU::getTestCaseName(const testing::TestParamInfo& obj) { + const auto& out_shape = std::get<0>(obj.param); + const auto& min_max = std::get<1>(obj.param); + + std::ostringstream result; + + result << "IS={" << out_shape.size(); + result << "}_OS=" << out_shape; + result << "_Min=" << std::get<0>(min_max); + result << "_Max=" << std::get<1>(min_max); + result << "_ShapePrc=" << std::get<2>(obj.param); + result << "_OutPrc=" << std::get<3>(obj.param); + result << "_GlobalSeed=" << std::get<4>(obj.param); + result << "_OperationalSeed=" << std::get<5>(obj.param); + result << "_ConstIn={" << utils::bool2str(std::get<6>(obj.param)) << "," + << utils::bool2str(std::get<7>(obj.param)) << "," + << utils::bool2str(std::get<8>(obj.param)) << "}"; + + result << CPUTestsBase::getTestCaseName(std::get<9>(obj.param)); + + const auto& config = std::get<10>(obj.param); + if (!config.empty()) { + result << "_PluginConf={"; + for (const auto& conf_item : config) { + result << "_" << conf_item.first << "="; + conf_item.second.print(result); + } + result << "}"; + } + + return result.str(); +} + +void RandomUniformLayerTestCPU::SetUp() { + targetDevice = utils::DEVICE_CPU; + + const auto& params = this->GetParam(); + m_output_shape = std::get<0>(params); + const auto& min_max = std::get<1>(params); + const auto& shape_prc = std::get<2>(params); + const auto& output_prc = std::get<3>(params); + m_global_seed = std::get<4>(params); + m_operational_seed = std::get<5>(params); + const auto& const_in_1 = std::get<6>(params); + const auto& const_in_2 = std::get<7>(params); + const auto& const_in_3 = std::get<8>(params); + const auto& cpu_params = std::get<9>(params); + configuration = std::get<10>(params); + + m_min_val = std::get<0>(min_max); + m_max_val = std::get<1>(min_max); + std::tie(inFmts, outFmts, priority, selectedType) = cpu_params; + +#if defined(OV_CPU_WITH_ACL) + updateSelectedType("ref_any", output_prc, configuration); +#else + if (output_prc == ElementType::i64) { + updateSelectedType(getPrimitiveType(), ElementType::i32, configuration); + } else if (output_prc == ElementType::f64) { + updateSelectedType(getPrimitiveType(), ElementType::f32, configuration); + } else if (output_prc == ElementType::f16) { + if (InferenceEngine::with_cpu_x86_avx512_core_fp16()) { + updateSelectedType(getPrimitiveType(), ElementType::f16, configuration); + } else { + updateSelectedType(getPrimitiveType(), ElementType::f32, configuration); + } + } else if (output_prc == ElementType::bf16) { + if (InferenceEngine::with_cpu_x86_bfloat16()) { + updateSelectedType(getPrimitiveType(), ElementType::bf16, configuration); + } else { + updateSelectedType("ref_any", ElementType::bf16, configuration); + } + } else { + updateSelectedType(getPrimitiveType(), output_prc, configuration); + } +#endif + + std::vector in_shapes; + ov::ParameterVector in_params; + std::vector> inputs; + + if (!const_in_1) { + in_shapes.push_back({{}, {{m_output_shape.size()}}}); + in_params.push_back(std::make_shared(shape_prc, ov::PartialShape{static_cast(m_output_shape.size())})); + in_params.back()->set_friendly_name("shape"); + inputs.push_back(in_params.back()); + } else { + inputs.push_back(ngraph::builder::makeConstant(shape_prc, {m_output_shape.size()}, m_output_shape)); + } + if (!const_in_2) { + in_shapes.push_back({{}, {{1}}}); + in_params.push_back(std::make_shared(output_prc, ov::PartialShape{1})); + in_params.back()->set_friendly_name("minval"); + inputs.push_back(in_params.back()); + } else { + inputs.push_back(ngraph::builder::makeConstant(output_prc, {1}, std::vector{m_min_val})); + } + if (!const_in_3) { + in_shapes.push_back({{}, {{1}}}); + in_params.push_back(std::make_shared(output_prc, ov::PartialShape{1})); + in_params.back()->set_friendly_name("maxval"); + inputs.push_back(in_params.back()); + } else { + inputs.push_back(ngraph::builder::makeConstant(output_prc, {1}, std::vector{m_max_val})); + } + + init_input_shapes(in_shapes); + + const auto rnd_op = std::make_shared(inputs[0], inputs[1], inputs[2], output_prc, m_global_seed, m_operational_seed); + const ov::ResultVector results{std::make_shared(rnd_op)}; + + function = std::make_shared(results, in_params, "RandomUniformLayerTestCPU"); +} + +template +void fill_data(TD* dst, const TS* src, size_t len) { + for (size_t i = 0llu; i < len; i++) { + dst[i] = static_cast(src[i]); + } +} + +void RandomUniformLayerTestCPU::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + const auto& func_inputs = function->inputs(); + + for (size_t i = 0llu; i < func_inputs.size(); ++i) { + const auto& func_input = func_inputs[i]; + const auto& name = func_input.get_node()->get_friendly_name(); + const auto& in_prc = func_input.get_element_type(); + auto tensor = ov::Tensor(in_prc, targetInputStaticShapes[i]); + +#define CASE(P, S, L) \ +case P : \ +fill_data(tensor.data::value_type>(), S, L); break; + + if (name == "shape") { + switch (in_prc) { + CASE(ElementType::i32, m_output_shape.data(), m_output_shape.size()) + CASE(ElementType::i64, m_output_shape.data(), m_output_shape.size()) + default: + OPENVINO_THROW("RandomUniform does not support precision ", in_prc, " for the Shape input."); + } + } else if (name == "minval") { + switch (in_prc) { + CASE(ElementType::f32, &m_min_val, 1) + CASE(ElementType::f16, &m_min_val, 1) + CASE(ElementType::bf16, &m_min_val, 1) + CASE(ElementType::i32, &m_min_val, 1) + CASE(ElementType::i64, &m_min_val, 1) + CASE(ElementType::f64, &m_min_val, 1) + default: + OPENVINO_THROW("RandomUniform does not support precision ", in_prc, " for the Minval input."); + } + } else if (name == "maxval") { + switch (in_prc) { + CASE(ElementType::f32, &m_max_val, 1) + CASE(ElementType::f16, &m_max_val, 1) + CASE(ElementType::bf16, &m_max_val, 1) + CASE(ElementType::i32, &m_max_val, 1) + CASE(ElementType::i64, &m_max_val, 1) + CASE(ElementType::f64, &m_max_val, 1) + default: + OPENVINO_THROW("RandomUniform does not support precision ", in_prc, " for the Maxval input."); + } + } + +#undef CASE + + inputs.insert({func_input.get_node_shared_ptr(), tensor}); + } +} + +void RandomUniformLayerTestCPU::compare(const std::vector& expected, const std::vector& actual) { + if (m_global_seed != 0lu || m_operational_seed != 0lu) { + SubgraphBaseTest::compare(expected, actual); + return; + } + + // When both seed values are equal to zero, RandomUniform should generate non-deterministic sequence. + // In this case will use Mean and Variance metrics. + +#define CASE(X) case X : rndUCompare::value_type>(expected[0], actual[0]); break; + + switch (expected[0].get_element_type()) { + CASE(ElementType::f32) + CASE(ElementType::i32) + CASE(ElementType::f16) + CASE(ElementType::bf16) + CASE(ElementType::i64) + CASE(ElementType::f64) + default: OPENVINO_THROW("Unsupported element type: ", expected[0].get_element_type()); + } + +#undef CASE +} + +precisions_map RandomUniformLayerTestCPU::get_ref_precisions_convert_map() { + precisions_map precisions; + + if (!InferenceEngine::with_cpu_x86_avx512_core()) { + precisions.insert({ ov::element::bf16, ov::element::f32 }); + } + if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) { + precisions.insert({ ov::element::f16, ov::element::f32 }); + } + + return precisions; +} + +inline double less_or_equal(double a, double b) { + return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * std::numeric_limits::epsilon()); +} + +template +void RandomUniformLayerTestCPU::rndUCompare(const ov::Tensor& expected, const ov::Tensor& actual) { + auto actual_data = actual.data(); + size_t shape_size_cnt = ov::shape_size(expected.get_shape()); + double act_mean = 0.0; + double act_variance = 0.0; + const double exp_mean = (m_max_val + m_min_val) / 2.0; + const double exp_variance = std::pow(m_max_val - m_min_val, 2) / 12.0; + + for (size_t i = 0; i < shape_size_cnt; ++i) { + auto actual_value = static_cast(actual_data[i]); + if (std::isnan(actual_value)) { + std::ostringstream out_stream; + out_stream << "Actual value is NAN on coordinate: " << i; + throw std::runtime_error(out_stream.str()); + } + act_mean += actual_value; + act_variance += std::pow(actual_value - exp_mean, 2); + } + act_mean /= shape_size_cnt; + act_variance /= shape_size_cnt; + + auto rel_mean = (exp_mean - act_mean) / (m_max_val - m_min_val); + auto rel_variance = (exp_variance - act_variance) / std::pow(m_max_val - m_min_val, 2); + + if (!(less_or_equal(rel_mean, m_mean_threshold) && less_or_equal(rel_variance, m_variance_threshold))) { + std::ostringstream out_stream; + out_stream << "rel_mean < m_mean_threshold && rel_variance < m_variance_threshold" << + "\n\t rel_mean: " << rel_mean << + "\n\t rel_variance: " << rel_variance; + throw std::runtime_error(out_stream.str()); + } +} + +TEST_P(RandomUniformLayerTestCPU, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, "RandomUniform"); +} + +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp new file mode 100644 index 00000000000000..1cb9f5fccc451a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp @@ -0,0 +1,53 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "test_utils/cpu_test_utils.hpp" + +namespace CPULayerTestsDefinitions { + +typedef std::tuple< + ov::Shape, // Output shapes + std::tuple, // Min and Max values + ov::test::ElementType, // Shape precision + ov::test::ElementType, // Output precision + uint64_t, // Global seed + uint64_t, // Operational seed + bool, // Is 1st input constant + bool, // Is 2nd input constant + bool, // Is 3rd input constant + CPUTestUtils::CPUSpecificParams, // CPU specific params + ov::AnyMap // Additional plugin configuration +> RandomUniformLayerTestCPUParamSet; + +class RandomUniformLayerTestCPU : public testing::WithParamInterface, + public ov::test::SubgraphBaseTest, public CPUTestUtils::CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; + + void generate_inputs(const std::vector& target_shapes) override; + + void compare(const std::vector& expected, const std::vector& actual) override; + + precisions_map get_ref_precisions_convert_map() override; + + template + void rndUCompare(const ov::Tensor& expected, const ov::Tensor& actual); + +private: + ov::Shape m_output_shape; + uint64_t m_global_seed; + uint64_t m_operational_seed; + double m_min_val; + double m_max_val; + static constexpr double m_mean_threshold = 0.05; + static constexpr double m_variance_threshold = 0.1; +}; + +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/pooling.cpp new file mode 100644 index 00000000000000..e15408a6085b9d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/pooling.cpp @@ -0,0 +1,181 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/pooling.hpp" +#include "shared_test_classes/single_layer/pooling.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace Pooling { + +static CPUSpecificParams expectedCpuConfig() { +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + return CPUSpecificParams{{}, {}, {"acl"}, "acl"}; +#else + return CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"}; +#endif +} +const std::vector vecCpuConfigs = {expectedCpuConfig()}; + +const std::vector paramsAvg3D_RefOnly = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2}, {2}, {2}, {2}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, false }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_3D, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMax3D()), + ::testing::ValuesIn(inputShapes3D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::ValuesIn(vecCpuConfigs), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg3D()), + ::testing::ValuesIn(inputShapes3D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::ValuesIn(vecCpuConfigs), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D_NotOptimized, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg3D_RefOnly), + ::testing::ValuesIn(inputShapes3D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::Values(expectedCpuConfig()), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +const std::vector paramsAvg4D_RefOnly = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, false }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_4D, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMax4D()), + ::testing::ValuesIn(inputShapes4D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::ValuesIn(vecCpuConfigs), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_4D, MaxPoolingV8LayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMaxV84D()), + ::testing::ValuesIn(inputShapes4D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::ValuesIn(vecCpuConfigs)), + MaxPoolingV8LayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg4D()), + ::testing::ValuesIn(inputShapes4D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::ValuesIn(vecCpuConfigs), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg4D_RefOnly), + ::testing::ValuesIn(inputShapes4D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::Values(expectedCpuConfig()), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_Large, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg4D_Large()), + ::testing::ValuesIn(inputShapes4D_Large()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::ValuesIn(vecCpuConfigs), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +const std::vector paramsMaxV85D_ref = { + LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER }, + LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, + LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 3, 4}, {2, 2, 2}, {2, 1, 1}, {1, 1, 1}, {1, 2, 2}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, +}; + +const std::vector paramsAvg5D_RefOnly = { + LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, + expectedAvgRoundingType(), ngraph::op::PadType::EXPLICIT, false }, +}; + +//FIXME: 5D cases are temporarly disabled on ARM because ACL support check in Pooling::getSupportedDescriptors() can't check layout +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_5D, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMax5D()), + ::testing::ValuesIn(inputShapes5D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::ValuesIn(vecCpuConfigs), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_5D, MaxPoolingV8LayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMaxV85D()), + ::testing::ValuesIn(inputShapes5D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::ValuesIn(vecCpuConfigs)), + MaxPoolingV8LayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_5D_ref, MaxPoolingV8LayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMaxV85D_ref), + ::testing::ValuesIn(inputShapes5D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(expectedCpuConfig())), + MaxPoolingV8LayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_5D, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg5D()), + ::testing::ValuesIn(inputShapes5D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::ValuesIn(vecCpuConfigs), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_5D_NotOptimized, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg5D_RefOnly), + ::testing::ValuesIn(inputShapes5D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(false), + ::testing::Values(expectedCpuConfig()), + ::testing::Values(emptyFusingSpec)), + PoolingLayerCPUTest::getTestCaseName); +#endif +} // namespace Pooling +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/random_uniform.cpp new file mode 100644 index 00000000000000..f319fb6ada2719 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/random_uniform.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/random_uniform.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace RandomUniform { + +static const std::vector shape_prc = { + ElementType::i32, + ElementType::i64 +}; + +static const std::vector output_shapes = { + {500}, + {4, 3, 210} +}; + +static const std::vector global_seed = { + 0, 8 +}; + +static const std::vector operational_seed = { + 0, 3, 5 +}; + +static const std::vector> min_max = { + {0, 50}, + {-50, 50}, + {-50, 0} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Param, RandomUniformLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(output_shapes), + ::testing::ValuesIn(min_max), + ::testing::ValuesIn(shape_prc), + ::testing::Values(ElementType::f32, ElementType::i32), + ::testing::ValuesIn(global_seed), + ::testing::ValuesIn(operational_seed), + ::testing::Values(false), + ::testing::Values(false), + ::testing::Values(false), + ::testing::Values(emptyCPUSpec), + ::testing::Values(empty_plugin_config)), + RandomUniformLayerTestCPU::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ParamConst, RandomUniformLayerTestCPU, + ::testing::Combine( + ::testing::Values(output_shapes[0]), + ::testing::Values(min_max[0]), + ::testing::Values(ElementType::i32), + ::testing::Values(ElementType::f32), + ::testing::Values(1), + ::testing::Values(0), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(emptyCPUSpec), + ::testing::Values(empty_plugin_config)), + RandomUniformLayerTestCPU::getTestCaseName); + +} // namespace RandomUniform +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/pooling.cpp new file mode 100644 index 00000000000000..89331ea284d49a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/pooling.cpp @@ -0,0 +1,148 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/pooling.hpp" +#include "shared_test_classes/single_layer/pooling.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" +#include +#include + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + + +namespace CPULayerTestsDefinitions { +namespace Pooling { +namespace { + +const auto ref = CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"}; +const auto avx512 = CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"}; +const auto avx = CPUSpecificParams{{}, {}, {"jit_avx"}, "jit_avx"}; +const auto sse42 = CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"}; + +const std::vector vecCpuConfigs = {sse42, avx, avx512}; + +const std::vector paramsMaxV84D_ref = { + LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {2, 2}, {0, 0}, {0, 0}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER }, + LayerTestsDefinitions::maxPoolV8SpecificParams{ {4, 2}, {2, 2}, {1, 2}, {0, 0}, {0, 0}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, + LayerTestsDefinitions::maxPoolV8SpecificParams{ {4, 2}, {2, 1}, {2, 2}, {0, 0}, {0, 0}, + ngraph::element::Type_t::i32, 0, + ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_4D_ref, MaxPoolingV8LayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMaxV84D_ref), + ::testing::ValuesIn(inputShapes4D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(ref)), + MaxPoolingV8LayerCPUTest::getTestCaseName); + +const auto avx512_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512"}, "jit_avx512"}; +const auto avx512_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512"}, "jit_avx512"}; + +const auto avx2_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2"}, "jit_avx2"}; +const auto avx2_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2"}, "jit_avx2"}; + +const auto sse42_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42"}, "jit_sse42"}; +const auto sse42_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42"}, "jit_sse42"}; + +const std::vector vecCpuConfigsFusing_4D = {sse42_nhwc, avx2_nhwc, avx512_nhwc}; +const std::vector vecCpuConfigsFusing_5D = {sse42_ndhwc, avx2_ndhwc, avx512_ndhwc}; + +std::vector fusingParamsSet { + emptyFusingSpec, + fusingFakeQuantizePerTensor, + fusingFakeQuantizePerChannel, +}; + +const std::vector inputShapes4D_int8 = { + { {}, {{3, 4, 64, 64}} }, + { {}, {{2, 8, 8, 12}} }, + { {}, {{1, 16, 16, 12}} }, + { {}, {{1, 21, 8, 4}} }, + { {}, {{1, 32, 8, 8}} }, + { + // dynamic + {-1, 32, -1, -1}, + // target + { + {1, 32, 8, 8}, + {1, 32, 8, 4}, + {2, 32, 8, 12}, + {1, 32, 8, 8} + } + }, + { + // dynamic + {{1, 5}, 16, {1, 64}, {1, 64}}, + // target + { + {3, 16, 32, 32}, + {1, 16, 16, 12}, + {1, 16, 8, 8}, + {3, 16, 32, 32}, + } + } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D_I8, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg4D()), + ::testing::ValuesIn(inputShapes4D_int8), + ::testing::Values(ElementType::f32), + ::testing::Values(true), + ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_4D)), + ::testing::ValuesIn(fusingParamsSet)), + PoolingLayerCPUTest::getTestCaseName); + +const std::vector inputShapes5D_int8 = { + { {}, {{1, 4, 16, 16, 16}} }, + { {}, {{2, 8, 8, 8, 8}} }, + { {}, {{2, 16, 12, 16, 20}} }, + { {}, {{1, 19, 16, 20, 8}} }, + { {}, {{1, 32, 16, 8, 12}} }, + { + // dynamic + {-1, 32, -1, -1, -1}, + // target + { + {2, 32, 8, 8, 8}, + {1, 32, 16, 20, 8}, + {1, 32, 16, 16, 16}, + {2, 32, 8, 8, 8} + } + }, + { + // dynamic + {{1, 5}, 16, {1, 64}, {1, 64}, {1, 25}}, + // target + { + {1, 16, 16, 16, 16}, + {1, 16, 16, 8, 12}, + {2, 16, 8, 8, 8}, + {1, 16, 16, 16, 16}, + } + } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_5D_I8, PoolingLayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsAvg5D()), + ::testing::ValuesIn(inputShapes5D_int8), + ::testing::Values(ElementType::f32), + ::testing::Values(true), + ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_5D)), + ::testing::ValuesIn(fusingParamsSet)), + PoolingLayerCPUTest::getTestCaseName); +} // namespace +} // namespace Pooling +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/random_uniform.cpp new file mode 100644 index 00000000000000..8fec42f382464d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/random_uniform.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/random_uniform.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace RandomUniform { + +static const std::vector output_prc_nightly = { + ElementType::f32, + ElementType::f16, + ElementType::bf16, + ElementType::i32, + ElementType::i64 +}; + +// Need to validate the Kernel corner cases. +static const std::vector output_shapes_nightly = { + {1}, {2}, {3}, {2, 2}, {5}, {2, 3}, {7}, {2, 2, 2}, {3, 3}, {2, 5}, {11}, {2, 3, 2}, {13}, {2, 7}, {3, 5}, + {4, 4}, {1, 17}, {2, 9}, {19}, {4, 5}, {21}, {11, 2}, {23, 1}, {4, 2, 3}, {5, 5}, {26}, {1, 27}, {14, 2}, + {29}, {10, 3}, {31}, {2, 8, 2}, {33}, {17, 2}, {5, 7}, {2, 3, 2, 3}, {37}, {2, 19}, {2, 20}, {41}, {42}, + {43}, {22, 2}, {3, 5, 3}, {5, 2, 5}, {1, 3, 1, 17, 1}, {26, 2}, {53}, {54}, {55}, {56}, {57}, {58}, {59}, + {2, 32}, {99}, {127}, {128}, {129}, {199}, {255}, {499}, {997}, {1753}, {2899} +}; + +INSTANTIATE_TEST_SUITE_P(nightly_Param, RandomUniformLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(output_shapes_nightly), + ::testing::Values(std::tuple{-31, 17}), + ::testing::Values(ElementType::i32), + ::testing::ValuesIn(output_prc_nightly), + ::testing::Values(3), + ::testing::Values(1), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(emptyCPUSpec), + ::testing::Values(empty_plugin_config)), + RandomUniformLayerTestCPU::getTestCaseName); + +} // namespace RandomUniform +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/pooling.cpp deleted file mode 100644 index c6a76f7fee9fad..00000000000000 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/pooling.cpp +++ /dev/null @@ -1,704 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "ov_models/builders.hpp" -#include "test_utils/cpu_test_utils.hpp" -#include "test_utils/fusing_test_utils.hpp" -#include "shared_test_classes/single_layer/pooling.hpp" -#include "shared_test_classes/base/ov_subgraph.hpp" - -using namespace ov::test; -using namespace CPUTestUtils; - -namespace CPULayerTestsDefinitions { - -using poolLayerCpuTestParamsSet = std::tuple; - -using maxPoolV8LayerCpuTestParamsSet = std::tuple; - -class PoolingLayerCPUTest : public testing::WithParamInterface, - virtual public SubgraphBaseTest, public CpuTestWithFusing { -public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - LayerTestsDefinitions::poolSpecificParams basicParamsSet; - InputShape inputShapes; - ElementType inPrc; - bool isInt8; - CPUSpecificParams cpuParams; - fusingSpecificParams fusingParams; - std::tie(basicParamsSet, inputShapes, inPrc, isInt8, cpuParams, fusingParams) = obj.param; - - ngraph::helpers::PoolingTypes poolType; - std::vector kernel, stride; - std::vector padBegin, padEnd; - ngraph::op::PadType padType; - ngraph::op::RoundingType roundingType; - bool excludePad; - std::tie(poolType, kernel, stride, padBegin, padEnd, roundingType, padType, excludePad) = basicParamsSet; - - std::ostringstream results; - results << "IS=("; - results << ov::test::utils::partialShape2str({inputShapes.first}) << ")_"; - results << "TS="; - for (const auto& shape : inputShapes.second) { - results << ov::test::utils::vec2str(shape) << "_"; - } - results << "Prc=" << inPrc << "_"; - switch (poolType) { - case ngraph::helpers::PoolingTypes::MAX: - results << "MaxPool_"; - break; - case ngraph::helpers::PoolingTypes::AVG: - results << "AvgPool_"; - results << "ExcludePad=" << excludePad << "_"; - break; - } - results << "K" << ov::test::utils::vec2str(kernel) << "_"; - results << "S" << ov::test::utils::vec2str(stride) << "_"; - results << "PB" << ov::test::utils::vec2str(padBegin) << "_"; - results << "PE" << ov::test::utils::vec2str(padEnd) << "_"; - results << "Rounding=" << roundingType << "_"; - results << "AutoPad=" << padType << "_"; - results << "INT8=" << isInt8 << "_"; - - results << CPUTestsBase::getTestCaseName(cpuParams); - results << CpuTestWithFusing::getTestCaseName(fusingParams); - return results.str(); - } - -protected: - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_CPU; - - LayerTestsDefinitions::poolSpecificParams basicParamsSet; - InputShape inputShapes; - ElementType inPrc; - bool isInt8; - CPUSpecificParams cpuParams; - fusingSpecificParams fusingParams; - std::tie(basicParamsSet, inputShapes, inPrc, isInt8, cpuParams, fusingParams) = this->GetParam(); - - ngraph::helpers::PoolingTypes poolType; - std::vector kernel, stride; - std::vector padBegin, padEnd; - ngraph::op::PadType padType; - ngraph::op::RoundingType roundingType; - bool excludePad; - std::tie(poolType, kernel, stride, padBegin, padEnd, roundingType, padType, excludePad) = basicParamsSet; - - std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; - std::tie(postOpMgrPtr, fusedOps) = fusingParams; - - if (selectedType.empty()) { - selectedType = getPrimitiveType(); - } - if (isInt8) - selectedType = selectedType + "_I8"; - else - selectedType = makeSelectedTypeStr(selectedType, inPrc); - - init_input_shapes({inputShapes}); - - ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { - params.push_back(std::make_shared(inPrc, shape)); - } - std::shared_ptr poolInput = params[0]; - if (isInt8) { - ov::Shape newShape(poolInput->get_output_partial_shape(0).size(), 1); - poolInput = ngraph::builder::makeFakeQuantize(poolInput, inPrc, 256, newShape); - } - - std::shared_ptr pooling = ngraph::builder::makePooling(poolInput, - stride, - padBegin, - padEnd, - kernel, - roundingType, - padType, - excludePad, - poolType); - - function = makeNgraphFunction(inPrc, params, pooling, "PoolingCPU"); - } -}; - -class MaxPoolingV8LayerCPUTest : public testing::WithParamInterface, - virtual public SubgraphBaseTest, public CPUTestsBase { -public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - LayerTestsDefinitions::maxPoolV8SpecificParams basicParamsSet; - InputShape inputShapes; - ElementType inPrc; - CPUSpecificParams cpuParams; - std::tie(basicParamsSet, inputShapes, inPrc, cpuParams) = obj.param; - - std::vector kernel, stride, dilation; - std::vector padBegin, padEnd; - ngraph::op::PadType padType; - ngraph::op::RoundingType roundingType; - ngraph::element::Type indexElementType; - int64_t axis; - std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = basicParamsSet; - - std::ostringstream results; - results << "IS=("; - results << ov::test::utils::partialShape2str({inputShapes.first}) << ")_"; - results << "TS="; - for (const auto& shape : inputShapes.second) { - results << ov::test::utils::vec2str(shape) << "_"; - } - results << "Prc=" << inPrc << "_"; - results << "MaxPool_"; - results << "K" << ov::test::utils::vec2str(kernel) << "_"; - results << "S" << ov::test::utils::vec2str(stride) << "_"; - results << "D" << ov::test::utils::vec2str(dilation) << "_"; - results << "PB" << ov::test::utils::vec2str(padBegin) << "_"; - results << "PE" << ov::test::utils::vec2str(padEnd) << "_"; - results << "Rounding=" << roundingType << "_"; - results << "AutoPad=" << padType << "_"; - - results << CPUTestsBase::getTestCaseName(cpuParams); - return results.str(); - } - -protected: - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_CPU; - - LayerTestsDefinitions::maxPoolV8SpecificParams basicParamsSet; - InputShape inputShapes; - ElementType inPrc; - CPUSpecificParams cpuParams; - std::tie(basicParamsSet, inputShapes, inPrc, cpuParams) = this->GetParam(); - - std::vector kernel, stride, dilation; - std::vector padBegin, padEnd; - ngraph::op::PadType padType; - ngraph::op::RoundingType roundingType; - ngraph::element::Type indexElementType; - int64_t axis; - std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = basicParamsSet; - std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; - if (selectedType.empty()) { - selectedType = getPrimitiveType(); - } - selectedType = makeSelectedTypeStr(selectedType, inPrc); - - init_input_shapes({inputShapes}); - - ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { - params.push_back(std::make_shared(inPrc, shape)); - } - std::shared_ptr pooling = ngraph::builder::makeMaxPoolingV8(params[0], stride, dilation, padBegin, padEnd, - kernel, roundingType, padType, - indexElementType, axis); - pooling->get_rt_info() = getCPUInfo(); - ngraph::ResultVector results{std::make_shared(pooling->output(0))}; - function = std::make_shared(results, params, "MaxPooling"); - } -}; - -TEST_P(PoolingLayerCPUTest, CompareWithRefs) { - run(); - CheckPluginRelatedResults(compiledModel, "Pooling"); -} - -TEST_P(MaxPoolingV8LayerCPUTest, CompareWithRefs) { - run(); - CheckPluginRelatedResults(compiledModel, "Pooling"); -} - -namespace { - -const auto avx512 = CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"}; -const auto avx = CPUSpecificParams{{}, {}, {"jit_avx"}, "jit_avx"}; -const auto sse42 = CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"}; -const auto ref = CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"}; - -const std::vector vecCpuConfigs = {ref, sse42, avx, avx512}; -const std::vector inpOutPrecision = {ElementType::f32/*, ElementType::bf16*/}; - -const std::vector inputShapes3D = { - { {}, {{3, 4, 64}} }, - { {}, {{2, 8, 12}} }, - { {}, {{1, 16, 12}} }, - { {}, {{1, 21, 4}} }, - { {}, {{1, 32, 8}} }, - { - // dynamic - {-1, -1, -1}, - // target - { - {1, 32, 8}, - {1, 21, 4}, - {2, 8, 12} - } - }, - { - // dynamic - {{1, 5}, {4, 32}, {1, 64}}, - // target - { - {3, 4, 64}, - {1, 16, 12}, - {1, 32, 8} - } - } -}; - -const std::vector inputShapes4D = { - { {}, {{3, 4, 64, 64}} }, - { {}, {{2, 8, 8, 12}} }, - { {}, {{1, 16, 16, 12}} }, - { {}, {{1, 21, 8, 4}} }, - { {}, {{1, 32, 8, 8}} }, - { - // dynamic - {-1, -1, -1, -1}, - // target - { - {1, 32, 8, 8}, - {1, 21, 8, 4}, - {2, 8, 8, 12}, - {1, 96, 125, 125} - } - }, - { - // dynamic - {{1, 5}, {4, 32}, {1, 64}, {1, 64}}, - // target - { - {3, 4, 64, 64}, - {1, 16, 16, 12}, - {1, 32, 8, 8} - } - }, - { - // dynamic - {{1, 10}, 16, 8, 8}, - // target - { - {1, 16, 8, 8}, - {2, 16, 8, 8}, - } - } -}; - -const std::vector inputShapes5D = { - { {}, {{1, 4, 16, 16, 16}} }, - { {}, {{2, 8, 8, 8, 8}} }, - { {}, {{2, 16, 12, 16, 20}} }, - { {}, {{1, 19, 16, 20, 8}} }, - { {}, {{1, 32, 16, 8, 12}} }, - { - // dynamic - {-1, -1, -1, -1, -1}, - // target - { - {2, 8, 8, 8, 8}, - {1, 19, 16, 20, 8}, - {1, 4, 16, 16, 16} - } - }, - { - // dynamic - {{1, 5}, {4, 32}, {1, 64}, {1, 64}, {1, 25}}, - // target - { - {1, 4, 16, 16, 16}, - {1, 32, 16, 8, 12}, - {3, 16, 4, 8, 3} - } - } -}; - -/* ============= Pooling (1D) ============= */ -const std::vector paramsMax3D = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {2}, {0}, {0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4}, {2}, {0}, {0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {1}, {0}, {0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, -}; - -const std::vector paramsAvg3D = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4}, {4}, {2}, {2}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true }, -}; - -const std::vector paramsAvg3D_RefOnly = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2}, {2}, {2}, {2}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_3D, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsMax3D), - ::testing::ValuesIn(inputShapes3D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg3D), - ::testing::ValuesIn(inputShapes3D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D_NotOptimized, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg3D_RefOnly), - ::testing::ValuesIn(inputShapes3D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::Values(ref), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -/* ============= Pooling (2D) ============= */ -const std::vector paramsMax4D = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2}, {2, 2}, {0, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2}, {2, 2}, {0, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4, 2}, {2, 2}, {0, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4, 2}, {2, 1}, {0, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, -}; - -const std::vector paramsMaxV84D = { - LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {1, 1}, {0, 0}, {0, 0}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER }, -}; - -const std::vector paramsMaxV84D_ref = { - LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {2, 2}, {0, 0}, {0, 0}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER }, - LayerTestsDefinitions::maxPoolV8SpecificParams{ {4, 2}, {2, 2}, {1, 2}, {0, 0}, {0, 0}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, - LayerTestsDefinitions::maxPoolV8SpecificParams{ {4, 2}, {2, 1}, {2, 2}, {0, 0}, {0, 0}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, -}; - -const std::vector paramsAvg4D = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {1, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {0, 0}, {0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4, 4}, {4, 4}, {2, 2}, {2, 2}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true }, -}; - -const std::vector paramsAvg4D_RefOnly = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_4D, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsMax4D), - ::testing::ValuesIn(inputShapes4D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_4D, MaxPoolingV8LayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsMaxV84D), - ::testing::ValuesIn(inputShapes4D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))), - MaxPoolingV8LayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_4D_ref, MaxPoolingV8LayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsMaxV84D_ref), - ::testing::ValuesIn(inputShapes4D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(ref)), - MaxPoolingV8LayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg4D), - ::testing::ValuesIn(inputShapes4D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg4D_RefOnly), - ::testing::ValuesIn(inputShapes4D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::Values(ref), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -const std::vector paramsAvg4D_Large = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {65, 65}, {65, 65}, {0, 0}, {0, 0}, - ngraph::op::RoundingType::FLOOR, ngraph::op::PadType::VALID, true }, -}; - -const std::vector inputShapes4D_Large = { - { - // dynamic - {-1, -1, -1, -1}, - // target - { - {1, 16, 65, 65}, - {1, 8, 130, 130}, - {1, 16, 65, 65} - } - }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_Large, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg4D_Large), - ::testing::ValuesIn(inputShapes4D_Large), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -/* ============= Pooling (3D) ============= */ -const std::vector paramsMax5D = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, -}; - -const std::vector paramsMaxV85D = { - LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER }, -}; - -const std::vector paramsMaxV85D_ref = { - LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER }, - LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, - LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 3, 4}, {2, 2, 2}, {2, 1, 1}, {1, 1, 1}, {1, 2, 2}, - ngraph::element::Type_t::i32, 0, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT }, -}; - -const std::vector paramsAvg5D = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {1, 0, 0}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3, 3, 3}, {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true }, - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4, 4, 4}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true }, -}; - -const std::vector paramsAvg5D_RefOnly = { - LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, - ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_5D, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsMax5D), - ::testing::ValuesIn(inputShapes5D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_5D, MaxPoolingV8LayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsMaxV85D), - ::testing::ValuesIn(inputShapes5D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))), - MaxPoolingV8LayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV8_CPU_5D_ref, MaxPoolingV8LayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsMaxV85D_ref), - ::testing::ValuesIn(inputShapes5D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(ref)), - MaxPoolingV8LayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_5D, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg5D), - ::testing::ValuesIn(inputShapes5D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_5D_NotOptimized, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg5D_RefOnly), - ::testing::ValuesIn(inputShapes5D), - ::testing::ValuesIn(inpOutPrecision), - ::testing::Values(false), - ::testing::Values(ref), - ::testing::Values(emptyFusingSpec)), - PoolingLayerCPUTest::getTestCaseName); - -/* === Fusing === */ - -const auto avx512_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512"}, "jit_avx512"}; -const auto avx512_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512"}, "jit_avx512"}; - -const auto avx2_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2"}, "jit_avx2"}; -const auto avx2_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2"}, "jit_avx2"}; - -const auto sse42_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42"}, "jit_sse42"}; -const auto sse42_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42"}, "jit_sse42"}; - -const std::vector vecCpuConfigsFusing_4D = {sse42_nhwc, avx2_nhwc, avx512_nhwc}; -const std::vector vecCpuConfigsFusing_5D = {sse42_ndhwc, avx2_ndhwc, avx512_ndhwc}; - -std::vector fusingParamsSet { - emptyFusingSpec, - fusingFakeQuantizePerTensor, - fusingFakeQuantizePerChannel, -}; - -const std::vector inputShapes4D_int8 = { - { {}, {{3, 4, 64, 64}} }, - { {}, {{2, 8, 8, 12}} }, - { {}, {{1, 16, 16, 12}} }, - { {}, {{1, 21, 8, 4}} }, - { {}, {{1, 32, 8, 8}} }, - { - // dynamic - {-1, 32, -1, -1}, - // target - { - {1, 32, 8, 8}, - {1, 32, 8, 4}, - {2, 32, 8, 12}, - {1, 32, 8, 8} - } - }, - { - // dynamic - {{1, 5}, 16, {1, 64}, {1, 64}}, - // target - { - {3, 16, 32, 32}, - {1, 16, 16, 12}, - {1, 16, 8, 8}, - {3, 16, 32, 32}, - } - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D_I8, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg4D), - ::testing::ValuesIn(inputShapes4D_int8), - ::testing::Values(ElementType::f32), - ::testing::Values(true), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_4D)), - ::testing::ValuesIn(fusingParamsSet)), - PoolingLayerCPUTest::getTestCaseName); - -const std::vector inputShapes5D_int8 = { - { {}, {{1, 4, 16, 16, 16}} }, - { {}, {{2, 8, 8, 8, 8}} }, - { {}, {{2, 16, 12, 16, 20}} }, - { {}, {{1, 19, 16, 20, 8}} }, - { {}, {{1, 32, 16, 8, 12}} }, - { - // dynamic - {-1, 32, -1, -1, -1}, - // target - { - {2, 32, 8, 8, 8}, - {1, 32, 16, 20, 8}, - {1, 32, 16, 16, 16}, - {2, 32, 8, 8, 8} - } - }, - { - // dynamic - {{1, 5}, 16, {1, 64}, {1, 64}, {1, 25}}, - // target - { - {1, 16, 16, 16, 16}, - {1, 16, 16, 8, 12}, - {2, 16, 8, 8, 8}, - {1, 16, 16, 16, 16}, - } - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_5D_I8, PoolingLayerCPUTest, - ::testing::Combine( - ::testing::ValuesIn(paramsAvg5D), - ::testing::ValuesIn(inputShapes5D_int8), - ::testing::Values(ElementType::f32), - ::testing::Values(true), - ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_5D)), - ::testing::ValuesIn(fusingParamsSet)), - PoolingLayerCPUTest::getTestCaseName); - -} // namespace - -} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp index e29ed1f4bfce94..b107b406cd833a 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp @@ -14,18 +14,26 @@ using namespace ov::test; namespace SubgraphTestsDefinitions { /* - * Subtract_const(U8/NF4) + * WP - weights precision + * DP - decompression precision + * IP - input precision + * Opt - optional + * Subtract_const(WP) * / - * Weights(U8/NF4) Convert(F32) + * Weights(WP) Convert(DP) * | / - * Convert(F32) Reshape - * \ / Multiply_const(F32) - * Subtract(opt) / - * \ Reshape - * \ / - * Multiply + * Convert(DP) Reshape (Opt) + * \ / Multiply_const(DP) + * Subtract(Opt) / + * \ Reshape (Opt) + * \ / + * Multiply * | - * Data(F32) Transpose(opt) + * Reshape (in case of group decompression) + * | + * Convert (if IP != DP) + * | + * Data(IP) Transpose(Opt) * \ / * Matmul * | @@ -46,6 +54,7 @@ struct ShapeParams { }; using MatmulWeightsDecompressionParams = std::tuple obj) { ShapeParams shape_params; ov::test::ElementType weights_precision; + ov::test::ElementType decompression_precision; bool transpose; bool decompression_sub; bool reshape_on_decompression; @@ -69,6 +79,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_precision, transformed_weights_shape, {}, true); weights->set_friendly_name("Compressed_weights"); - auto weights_convert = std::make_shared(weights, data_precision); + auto weights_convert = std::make_shared(weights, decompression_precision); std::shared_ptr mul_parent = weights_convert; auto output_channels = *weights_shape.rbegin(); @@ -152,7 +165,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_precision, scaleshift_const_shape, {}, true); - std::shared_ptr shift_convert = std::make_shared(shift_const, data_precision); + std::shared_ptr shift_convert = std::make_shared(shift_const, decompression_precision); if (reshape_on_decompression_constant) { auto shift_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); auto shift_reshape = std::make_shared(shift_convert, shift_reshape_const, false); @@ -161,7 +174,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_convert, shift_convert); } - std::shared_ptr scale_const = ngraph::builder::makeConstant(data_precision, scaleshift_const_shape, {}, true); + std::shared_ptr scale_const = ngraph::builder::makeConstant(decompression_precision, scaleshift_const_shape, {}, true); if (reshape_on_decompression_constant) { auto scale_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); auto scale_reshape = std::make_shared(scale_const, scale_reshape_const, false); @@ -175,6 +188,9 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(last_node, target_shape_node, false); } + if (decompression_precision != data_precision) { + last_node = std::make_shared(last_node, data_precision); + } if (transpose_weights) { const size_t rank = last_node->get_output_partial_shape(0).size(); std::vector order(rank); @@ -191,6 +207,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(test_param); + const bool should_fuse = std::get<8>(test_param); const size_t expected_count = should_fuse ? 0 : 1; CheckNumberOfNodesWithType(compiledModel, "Convert", expected_count); CheckNumberOfNodesWithType(compiledModel, "Eltwise", expected_count); @@ -304,6 +325,7 @@ bool shouldUseDecompressionKernelBasic() { } const std::vector weights_precisions = {ov::element::u8, ov::element::nf4}; +const std::vector decompression_precisions = {ov::element::f32}; const std::vector input_shapes_basic = { {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}}, {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, @@ -331,6 +353,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_basic), ::testing::ValuesIn(weights_precisions), + ::testing::ValuesIn(decompression_precisions), ::testing::Values(true), ::testing::Values(true), ::testing::Values(true), @@ -343,6 +366,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_big, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_big), ::testing::ValuesIn(weights_precisions), + ::testing::ValuesIn(decompression_precisions), ::testing::Values(true), ::testing::Values(true), ::testing::Values(true), @@ -364,11 +388,13 @@ const std::vector input_shapes_corner_cases_big = { const std::vector transpose_weights = {true, false}; const std::vector add_decompression_sub = {true, false}; const std::vector reshape_on_decompression = {true, false}; +const std::vector decompression_precisions_corner_cases = {ov::element::f16, ov::element::f32}; INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_basic, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_corner_cases_basic), ::testing::ValuesIn(weights_precisions), + ::testing::ValuesIn(decompression_precisions_corner_cases), ::testing::ValuesIn(transpose_weights), ::testing::ValuesIn(add_decompression_sub), ::testing::ValuesIn(reshape_on_decompression), @@ -381,6 +407,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_big, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_corner_cases_big), ::testing::ValuesIn(weights_precisions), + ::testing::ValuesIn(decompression_precisions_corner_cases), ::testing::ValuesIn(transpose_weights), ::testing::ValuesIn(add_decompression_sub), ::testing::ValuesIn(reshape_on_decompression), diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp index d8deddfebe5d69..fff65f9e1c442f 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp @@ -170,6 +170,7 @@ class CPUTestsBase { // common parameters const auto emptyCPUSpec = CPUSpecificParams{{}, {}, {}, {}}; const std::map cpuEmptyPluginConfig; +const ov::AnyMap empty_plugin_config{}; const std::map cpuFP32PluginConfig = { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } }; const std::map cpuBF16PluginConfig = diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/helpers.py b/src/plugins/intel_cpu/tools/commit_slider/utils/helpers.py index c30c5773467b4f..3adf6e65025af4 100644 --- a/src/plugins/intel_cpu/tools/commit_slider/utils/helpers.py +++ b/src/plugins/intel_cpu/tools/commit_slider/utils/helpers.py @@ -191,14 +191,10 @@ def runCommandList(commit, cfgData, enforceClean=False): ) proc = subprocess.Popen( formattedCmd, cwd=cwd, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT + stderr=subprocess.STDOUT, + encoding="utf-8", errors="replace" ) for line in proc.stdout: - # decode if line is byte-type - try: - line = line.decode("utf-8") - except (UnicodeDecodeError, AttributeError): - pass sys.stdout.write(line) commitLogger.info(line) if "catchMsg" in cmd: diff --git a/src/plugins/intel_gna/tests/deprecated/helpers/single_layer_common.hpp b/src/plugins/intel_gna/tests/deprecated/helpers/single_layer_common.hpp index 03cf9af92fbaa7..765846056930f8 100644 --- a/src/plugins/intel_gna/tests/deprecated/helpers/single_layer_common.hpp +++ b/src/plugins/intel_gna/tests/deprecated/helpers/single_layer_common.hpp @@ -20,10 +20,6 @@ # include # define REPLACE_WITH_STR(SRC, PATTERN, STR) SRC = std::regex_replace(SRC, std::regex(PATTERN), STR) # define FIND_STR(SRC, PATTERN) std::regex_search(SRC, std::regex(PATTERN)) -#elif defined USE_BOOST_RE -# include -# define REPLACE_WITH_STR(SRC, PATTERN, STR) SRC = boost::regex_replace(SRC, boost::regex(PATTERN), STR) -# define FIND_STR(SRC, PATTERN) boost::regex_search(SRC, boost::regex(PATTERN)) #else # error "Cannot implement regex" # define REPLACE_WITH_STR(SRC, PATTERN, STR) diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp deleted file mode 100644 index 299935865f1ca5..00000000000000 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "subgraph_tests/perm_conv_perm_concat.hpp" - -#include - -#include "common_test_utils/test_constants.hpp" -namespace { -std::vector> input_shapes{ - {1, 1, 7, 32}, - {1, 1, 8, 16}, -}; - -std::vector> kernel_shapes{ - {1, 3}, - {1, 5}, -}; - -std::vector output_channels{ - 32, - 64, -}; - -std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, -}; - -std::vector> configs = {{{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}}, - {{"GNA_DEVICE_MODE", "GNA_SW_FP32"}}}; -} // namespace - -namespace SubgraphTestsDefinitions { -INSTANTIATE_TEST_SUITE_P(smoke_basic, - PermConvPermConcat, - ::testing::Combine(::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GNA), - ::testing::ValuesIn(input_shapes), - ::testing::ValuesIn(kernel_shapes), - ::testing::ValuesIn(output_channels), - ::testing::ValuesIn(configs)), - PermConvPermConcat::getTestCaseName); -} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp deleted file mode 100644 index f702ba9b3ec5f7..00000000000000 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "subgraph_tests/reshape_squeeze_reshape_relu.hpp" - -#include - -#include "common_test_utils/test_constants.hpp" - -using namespace SubgraphTestsDefinitions; - -namespace { -std::vector inputs{ - {{1, 1, 3}, {0, 1}}, - {{1, 1, 3}, {0}}, - {{1, 1, 3}, {1}}, - {{1, 3, 1}, {0, 2}}, - {{1, 3, 1}, {0}}, - {{1, 3, 1}, {2}}, - {{3, 1, 1}, {1, 2}}, - {{3, 1, 1}, {1}}, - {{3, 1, 1}, {2}}, - {{4, 1, 3, 1}, {1, 3}}, - {{4, 1, 1, 3}, {1, 2}}, - {{1, 4, 1, 3}, {0, 2}}, - {{1, 3, 5, 2, 1}, {0, 4}}, - {{3, 1, 2, 4, 4, 3}, {1}}, - {{1, 1, 1, 1, 1, 3}, {0, 1, 2, 3, 4}}, - {{1, 1, 1, 1, 1, 3}, {1, 3}}, - {{1}, {0}}, -}; - -std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, -}; - -const std::vector opTypes = {ngraph::helpers::SqueezeOpType::SQUEEZE, - ngraph::helpers::SqueezeOpType::UNSQUEEZE}; - -INSTANTIATE_TEST_SUITE_P(smoke_reshape_squeeze_reshape_relu, - ReshapeSqueezeReshapeRelu, - ::testing::Combine(::testing::ValuesIn(inputs), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GNA), - ::testing::ValuesIn(opTypes)), - ReshapeSqueezeReshapeRelu::getTestCaseName); -} // namespace diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp deleted file mode 100644 index 1d9cea34c783ac..00000000000000 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "subgraph_tests/split_conv_concat.hpp" - -#include - -#include "common_test_utils/test_constants.hpp" - -using namespace SubgraphTestsDefinitions; -const std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; - -std::vector> inputShapes = {{1, 32, 1, 130}, {1, 64, 1, 170}, {1, 32, 1, 1026}}; - -INSTANTIATE_TEST_SUITE_P(smoke_SplitConvConcat, - SplitConvConcat, - ::testing::Combine(::testing::ValuesIn(netPrecisions), - ::testing::ValuesIn(inputShapes), - ::testing::Values(ov::test::utils::DEVICE_GNA)), - SplitConvConcat::getTestCaseName); diff --git a/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md b/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md index eb548eb386abe4..1ad9039435fb56 100644 --- a/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md +++ b/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md @@ -20,7 +20,7 @@ calls the corresponding memory object wrapper for each allocation type: [gpu_buf ## Dump memory allocation history -The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_GPU_Verbose=1` if OpenVino is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`. +The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_GPU_Verbose=2` if OpenVINO is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`. ```cpp ... GPU_Debug: Allocate 58982400 bytes of usm_host allocation type (current=117969612; max=117969612) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 1474543428a7b7..ab5d6b5e0af140 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -283,6 +283,8 @@ struct network { size_t _weights_cache_capacity = 1; std::unordered_map _events; + // This map is used to temporarily hold events that will be deallocated later + std::unordered_map _old_events; output_chains_map _output_chains; std::unique_ptr _shape_predictor; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 1ae7ef4f76618e..17e62ca926397b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -257,6 +257,7 @@ REGISTER_FACTORY(v11, Interpolate); REGISTER_FACTORY(v11, TopK); // ------------------------------ Supported v12 ops ----------------------------- // +REGISTER_FACTORY(v12, GroupNormalization); REGISTER_FACTORY(v12, Pad); REGISTER_FACTORY(v12, ScatterElementsUpdate); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp deleted file mode 100644 index 877c2c707f1791..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/so_ptr.hpp" - -#include - -namespace ov { -namespace intel_gpu { - -class RemoteTensorImpl; -class RemoteContextImpl; - -class USMHostAllocator final { -private: - ov::SoPtr _usm_host_tensor = { nullptr, nullptr }; - std::shared_ptr _context = nullptr; - -public: - using Ptr = std::shared_ptr; - - explicit USMHostAllocator(std::shared_ptr context) : _context(context) { } - - /** - * @brief Allocates memory - * @param size The size in bytes to allocate - * @return Handle to the allocated resource - */ - void* allocate(const size_t bytes, const size_t alignment = alignof(max_align_t)) noexcept; - /** - * @brief Releases handle and all associated memory resources which invalidates the handle. - * @return false if handle cannot be released, otherwise - true. - */ - bool deallocate(void* handle, const size_t bytes, size_t alignment = alignof(max_align_t)) noexcept; - - bool is_equal(const USMHostAllocator& other) const; -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp index 74a07bbcbf38bf..f7f72cc77a16a3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp @@ -15,6 +15,7 @@ #endif #include "openvino/runtime/iremote_tensor.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/plugin/common_utils.hpp" @@ -56,6 +57,8 @@ class RemoteTensorImpl : public ov::IRemoteTensor { cldnn::memory::ptr get_memory() const; cldnn::memory::ptr get_original_memory() const; + void set_memory(cldnn::memory::ptr memory, size_t actual_size); + std::shared_ptr get_context() const; private: @@ -76,8 +79,11 @@ class RemoteTensorImpl : public ov::IRemoteTensor { size_t m_hash = 0; bool supports_caching() const; + void update_hash(); void update_strides(); - void init_properties(); + void update_properties(); + + static TensorType allocation_type_to_tensor_type(cldnn::allocation_type t); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp index 1fd6d035dd48af..3050846e2c2354 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp @@ -90,9 +90,7 @@ class SyncInferRequest : public ov::ISyncInferRequest { bool need_lockable_mem) const; std::shared_ptr reinterpret_device_tensor(std::shared_ptr tensor, const ov::Shape new_shape) const; std::shared_ptr create_host_tensor(const ov::PartialShape& port_shape, const ov::element::Type& port_element_type) const; - std::shared_ptr create_device_tensor(const ov::Shape& pshape, ov::element::Type element_type, - bool need_lockable_memory = false, void* mem_ptr = nullptr) const; - std::shared_ptr create_shared_device_tensor(const ov::Shape& pshape, ov::element::Type element_type, void* usm_host_mem) const; + std::shared_ptr create_device_tensor(const ov::PartialShape& pshape, ov::element::Type element_type, bool need_lockable_memory = false) const; void allocate_inputs(); void allocate_outputs(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp new file mode 100644 index 00000000000000..d410fa046651e5 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/itensor.hpp" + +#include + +namespace ov { +namespace intel_gpu { + +class RemoteContextImpl; +class RemoteTensorImpl; + +class USMHostTensor : public ov::ITensor { +public: + USMHostTensor(std::shared_ptr context, const element::Type element_type, const Shape& shape); + explicit USMHostTensor(std::shared_ptr tensor); + + ~USMHostTensor() override = default; + + void* data(const element::Type& element_type) const override; + const element::Type& get_element_type() const override; + + const Shape& get_shape() const override; + + const Strides& get_strides() const override; + + void set_shape(ov::Shape new_shape) override; + + void set_memory(std::shared_ptr tensor); + + std::shared_ptr get_impl() const; + +private: + std::shared_ptr m_impl; +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp index 5ad37c7dfa55bf..4301f7cc10d31a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/condition.hpp @@ -54,8 +54,8 @@ struct condition : public primitive_base { const std::vector& inputs, const branch& branch_true, const branch& branch_false, - const padding& output_padding = padding()) - : primitive_base(id, inputs, {output_padding}), + const size_t num_outputs = 1) + : primitive_base(id, inputs, {padding()}, {optional_data_type()}, num_outputs), branch_true(branch_true), branch_false(branch_false) {} diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/gather.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/gather.hpp index 2a7dad7fe4774d..cbc64a0e143ec2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/gather.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/gather.hpp @@ -21,6 +21,7 @@ struct gather : public primitive_base { /// @param dict Input dictionary primitive id. /// @param idx Input indexes primitive id. /// @param axis Gathering axis. + /// @param input_rank Input rank. /// @param output_shape Output shape. /// @param batch_dim Batch_dim /// @param support_neg_ind Support negative indexes @@ -28,18 +29,22 @@ struct gather : public primitive_base { const input_info& dict, const input_info& idx, const int64_t axis, + const int64_t input_rank, const ov::Shape& output_shape, const int64_t batch_dim = 0, const bool support_neg_ind = false, const padding& output_padding = padding()) : primitive_base(id, {dict, idx}, {output_padding}) , axis(axis) + , input_rank(input_rank) , output_shape(output_shape) , batch_dim(batch_dim) , support_neg_ind(support_neg_ind) {} /// @brief Gathering axis int64_t axis = 0; + /// @brief Gather input rank + int64_t input_rank; /// @brief Gather output shape ov::Shape output_shape; /// @brief Gathering batch_dim @@ -69,6 +74,7 @@ struct gather : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); ob << axis; + ob << input_rank; ob << output_shape; ob << batch_dim; ob << support_neg_ind; @@ -77,6 +83,7 @@ struct gather : public primitive_base { void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); ib >> axis; + ib >> input_rank; ib >> output_shape; ib >> batch_dim; ib >> support_neg_ind; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/group_normalization.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/group_normalization.hpp new file mode 100644 index 00000000000000..d8c7e385f4c9a7 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/group_normalization.hpp @@ -0,0 +1,73 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "primitive.hpp" + +namespace cldnn { + +/// @brief Performs the following transformation of the input tensor: +/// y = scale * (x - mean) / sqrt(variance + epsilon) + bias +/// The operation is applied per batch, per group of channels. +struct group_normalization : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(group_normalization) + + group_normalization() : primitive_base("", {}) {} + + /// @brief Constructs group_normalization primitive. + /// @param id This primitive id. + /// @param data The input tensor to be normalized. + /// @param scale Scale values tensor. + /// @param bias Bias values. + /// @param num_groups Number of groups the channel dimension will be divided into. + /// @param epsilon A value added to the variance which ensures that division by zero. + /// does not occur for any normalized element. + group_normalization(const primitive_id& id, + const input_info& data, + const input_info& scale, + const input_info& bias, + std::int64_t num_groups, + double epsilon, + const padding& output_padding = padding()) + : primitive_base(id, {data, scale, bias}, {output_padding}), num_groups{num_groups}, epsilon{epsilon} {} + + /// @brief Number of groups the channel dimension will be divided into + /// @details + /// Specifies the number of groups G that the channel dimension will be divided into. + std::int64_t num_groups{}; + + /// @brief A value added to the variance which ensures that division by zero. + /// @details + /// A very small value added to the variance for numerical stability. + /// Ensures that division by zero does not occur for any normalized element. + double epsilon{}; + + std::size_t hash() const override { + size_t seed = primitive::hash(); + seed = hash_combine(seed, num_groups); + return hash_combine(seed, epsilon); + } + + bool operator==(const primitive& rhs) const override { + if (!compare_common_params(rhs)) + return false; + + const auto& rhs_casted = downcast(rhs); + + return num_groups == rhs_casted.num_groups && epsilon == rhs_casted.epsilon; + } + + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + ob << num_groups; + ob << epsilon; + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + ib >> num_groups; + ib >> epsilon; + } +}; + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp index c27a88e1975f1c..282147cc9e9d3d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp @@ -7,8 +7,8 @@ #include #include "primitive.hpp" #include "intel_gpu/graph/topology.hpp" +#include "intel_gpu/graph/program.hpp" -#define DEFAULT_MAX_NUM_ITERATION 256 namespace cldnn { /// @@ -53,18 +53,35 @@ struct loop : public primitive_base { CLDNN_DECLARE_PRIMITIVE(loop) loop() : primitive_base("", {}), - max_iteration(0) {} + max_num_iterations(0) {} struct io_primitive_map { /// @brief Constructs a mapping from external input/output primitive to input/output primitive in body topology - /// + /// or a mapping from output of body topology to input of body topology for the next iteration. + /// @param external_id Primitive id of input of loop or output of body network. + /// @param internal_id Primitive id of input of body network. + /// @param axis Axis to iterate through. Negative value means the axis will not iterate through and start, end, stride arguments will be ignored. + /// @param start Index where the iteration starts from. Applies only when axis >=0. + /// @param end Index where iteration ends. Negative value means counting indexes from the end. Applies only when axis >=0. + /// @param stride Step of iteration. Negative value means backward iteration. Applies only when axis >=0. + io_primitive_map(primitive_id external_id, primitive_id internal_id, + int64_t axis = -1, int64_t start = 0, int64_t end = -1, int64_t stride = 1) : + external_id(external_id, 0), + internal_id(internal_id, 0), + axis(axis), + start(start), + end(end), + stride(stride) {} + + /// @brief Constructs a mapping from external input/output primitive to input/output primitive in body topology + /// or a mapping from output of body topology to input of body topology for the next iteration. /// @param external_id Primitive id of input of loop or output of body network. /// @param internal_id Primitive id of input of body network. /// @param axis Axis to iterate through. Negative value means the axis will not iterate through and start, end, stride arguments will be ignored. /// @param start Index where the iteration starts from. Applies only when axis >=0. /// @param end Index where iteration ends. Negative value means counting indexes from the end. Applies only when axis >=0. /// @param stride Step of iteration. Negative value means backward iteration. Applies only when axis >=0. - io_primitive_map(primitive_id external_id = "", primitive_id internal_id = "", + io_primitive_map(input_info external_id = input_info(), input_info internal_id = input_info(), int64_t axis = -1, int64_t start = 0, int64_t end = -1, int64_t stride = 1) : external_id(std::move(external_id)), internal_id(std::move(internal_id)), @@ -73,8 +90,8 @@ struct loop : public primitive_base { end(end), stride(stride) {} - primitive_id external_id; - primitive_id internal_id; + input_info external_id; + input_info internal_id; int64_t axis; int64_t start; int64_t end; @@ -125,68 +142,69 @@ struct loop : public primitive_base { /// /// @param id This primitive id. /// @param inputs Input data primitive ids. - /// @param body Topology to be recurrently executed. + /// @param body_program body program to be recurrently executed. /// @param trip_count_id Data primitive id in external topology specifying maximum number of iterations. /// Its data primitive should have 1 integer element. Negative value means infinite /// number of iteration. - /// @param initial_condition_id Data primitive id in external topology specifying initial execution + /// @param first_execution_condition_id Data primitive id in external topology specifying initial execution /// condition. Its data primitive should have 1 integer element. Zero means /// loop will not be executed, otherwise loop will be executed. /// @param num_iteration_id mutable_data primitive id to get the actual number of loop iterations. - /// @param current_iteration_id Optional data primitive id in the body network to specify current iteration. - /// If current_iteration_id is specified but body does not have data whose primitive - /// id is same as current_iteration_id, data primitive will be added in the body network. - /// @param condition_id Optional data primitive id in the body network to specify execution condition + /// @param body_current_iteration_id Optional data primitive id in the body network to specify current iteration. + /// If body_current_iteration_id is specified but body does not have data whose primitive + /// id is same as body_current_iteration_id, data primitive will be added in the body network. + /// @param body_execution_condition_id Optional data primitive id in the body network to specify execution condition /// for the next iteration. Its data primitive should have 1 integer element. Zero means - /// loop will not be executed, otherwise loop will be executed. If condition_id - /// is specified but body does not have data whose primitive id is same as condition_id, + /// loop will not be executed, otherwise loop will be executed. If body_execution_condition_id + /// is specified but body does not have data whose primitive id is same as body_execution_condition_id, /// data primitive will be added in the body network. /// @param primitive_map Rules to map input of loop or output of body topology to input of the body topology /// @param back_edges Output data primitive id. /// @param output_padding Optional padding for output from primitive. loop(const primitive_id& id, const std::vector& inputs, - const topology& body, + const program::ptr body_program, const primitive_id& trip_count_id, - const primitive_id& initial_condition_id, + const primitive_id& first_execution_condition_id, const primitive_id& num_iteration_id, const std::vector& input_primitive_maps, const std::vector& output_primitive_maps, const std::vector& back_edges, - int64_t max_iteration = -1, - const primitive_id& current_iteration_id = primitive_id(), - const primitive_id& condition_id = primitive_id(), - const padding& output_padding = padding()) - : primitive_base(id, inputs, {output_padding}), - body(body), + int64_t max_num_iterations = -1, + const primitive_id& body_current_iteration_id = primitive_id(), + const primitive_id& body_execution_condition_id = primitive_id(), + const size_t num_outputs = 1) + : primitive_base(id, inputs, {padding()}, {optional_data_type()}, num_outputs), + body_program(std::move(body_program)), trip_count_id(trip_count_id), - initial_execution_id(initial_condition_id), + first_execution_condition_id(first_execution_condition_id), num_iteration_id(num_iteration_id), - current_iteration_id(current_iteration_id), - condition_id(condition_id), + body_current_iteration_id(body_current_iteration_id), + body_execution_condition_id(body_execution_condition_id), input_primitive_maps(input_primitive_maps), output_primitive_maps(output_primitive_maps), back_edges(back_edges), - max_iteration(max_iteration) - {} + max_num_iterations(max_num_iterations) { + OPENVINO_ASSERT(inputs.front().pid == num_iteration_id, "first input of inputs should be num_iteration_id"); + } - /// @brief Topology to be recurrently executed. - topology body; + /// @brief Body program to be recurrently executed. + program::ptr body_program; /// @brief Data primitive id in external topology specifying maximum number of iterations. primitive_id trip_count_id; /// @brief Data primitive id in external topology specifying initial execution condition. - primitive_id initial_execution_id; + primitive_id first_execution_condition_id; /// @brief mutable_data primitive id to get the actual number of loop iterations. primitive_id num_iteration_id; /// @brief Data primitive id in the body network to store current iteration - primitive_id current_iteration_id; + primitive_id body_current_iteration_id; /// @brief Data primitive id in the body network to store execution condition - primitive_id condition_id; + primitive_id body_execution_condition_id; /// @brief Rules to map input or output data of loop layer onto input or output data of body topology. std::vector input_primitive_maps; @@ -195,7 +213,7 @@ struct loop : public primitive_base { /// @brief Rules to transfer data from body outputs at one iteration to body input at the next iteration. std::vector back_edges; - int64_t max_iteration; + int32_t max_num_iterations; size_t hash() const override { size_t seed = primitive::hash(); @@ -206,42 +224,43 @@ struct loop : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); ob << trip_count_id; - ob << initial_execution_id; + ob << first_execution_condition_id; ob << num_iteration_id; - ob << current_iteration_id; - ob << condition_id; + ob << body_current_iteration_id; + ob << body_execution_condition_id; ob << input_primitive_maps; ob << output_primitive_maps; ob << back_edges; - ob << max_iteration; + ob << max_num_iterations; } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); ib >> trip_count_id; - ib >> initial_execution_id; + ib >> first_execution_condition_id; ib >> num_iteration_id; - ib >> current_iteration_id; - ib >> condition_id; + ib >> body_current_iteration_id; + ib >> body_execution_condition_id; ib >> input_primitive_maps; ib >> output_primitive_maps; ib >> back_edges; - ib >> max_iteration; + ib >> max_num_iterations; } protected: std::vector> get_dependencies() const override { - std::vector> ret{ - std::ref(trip_count_id), std::ref(initial_execution_id), std::ref(num_iteration_id) - }; + std::vector> ret; + ret.push_back(std::ref(num_iteration_id)); + if (!trip_count_id.empty()) ret.push_back(std::ref(trip_count_id)); + if (!first_execution_condition_id.empty()) ret.push_back(std::ref(first_execution_condition_id)); + // add external_id in dependencies if not exist for (const auto& mapping : input_primitive_maps) { auto target = std::find_if(input.begin(), input.end(), - [&](const input_info& info) { - return info.pid == mapping.external_id; - }); + [&](const input_info& info) { + return info.pid == mapping.external_id.pid;}); if (target == input.end()) { - ret.push_back(std::ref(mapping.external_id)); + ret.push_back(std::ref(mapping.external_id.pid)); } } return ret; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp index bc55ed80e4f362..72c841a7578ab4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp @@ -70,8 +70,19 @@ struct input_info { ib >> pid; ib >> idx; } + + std::string to_string() const { + std::stringstream ss; + ss << "input_info(pid:" << pid << ",idx:" << idx << ")"; + return ss.str(); + } }; +static inline std::ostream& operator<< (std::ostream& os, input_info& info) { + os << info.to_string(); + return os; +} + struct prim_map_storage { static prim_map_storage& instance() { static prim_map_storage instance; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp index 9e38a8b99c7b5e..ea3e2aec0274d0 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp @@ -139,6 +139,7 @@ struct format { oyix, oxiy, os_iyx_osv16, ///< format used only for convolution weights + o_is_yx_isv2, ///< format used only for convolution weights o_is_yx_isv4, ///< format used only for convolution weights o_is_yx_isv16, ///< format used only for convolution weights o_is_zyx_isv16, ///< format used only for convolution weights diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 3dad0cea4e008b..679f4c51ea6881 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -13,10 +13,10 @@ #include #include #include -#include -#include -#include +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/core/type/element_type_traits.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/graph/serialization/vector_serializer.hpp" @@ -28,33 +28,9 @@ namespace cldnn { /// @addtogroup cpp_memory Memory description and management /// @{ -constexpr size_t float_type_mask = 0x80; -constexpr size_t uint_type_mask = 0x40; -constexpr size_t bin_type_mask = 0x20; - /// @brief Possible data types could be stored in memory. using data_types = ov::element::Type_t; -/// Converts @ref data_types to C++ type. -template -struct data_type_to_type; -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template <> -struct data_type_to_type { typedef uint32_t type; }; -template <> -struct data_type_to_type { typedef uint8_t type; }; -template <> -struct data_type_to_type { typedef int8_t type; }; -template <> -struct data_type_to_type { typedef int32_t type; }; -template <> -struct data_type_to_type { typedef int64_t type; }; -template <> -struct data_type_to_type { typedef ov::float16 type; }; -template <> -struct data_type_to_type { typedef float type; }; -#endif - /// Helper class to identify key properties for data_types. struct data_type_traits { static size_t size_of(data_types data_type) { @@ -72,52 +48,27 @@ struct data_type_traits { return et.is_quantized() && et.bitwidth() == 8; } - static size_t align_of(data_types data_type) { - switch (data_type) { - case data_types::u1: - return alignof(data_type_to_type::type); - case data_types::i8: - return alignof(data_type_to_type::type); - case data_types::u8: - return alignof(data_type_to_type::type); - case data_types::i32: - return alignof(data_type_to_type::type); - case data_types::i64: - return alignof(data_type_to_type::type); - case data_types::f16: - return alignof(data_type_to_type::type); - case data_types::f32: - return alignof(data_type_to_type::type); - default: - return size_t(1); - } - } - - static std::string name(data_types data_type) { - return ov::element::Type(data_type).get_type_name(); - } + static ov::element::Type max_type(ov::element::Type t1, ov::element::Type t2) { + if (t1 == ov::element::u1) + return t2; - static data_types max_type(data_types dt1, data_types dt2) { - if (dt1 == data_types::u1) - return dt2; + if (t2 == ov::element::u1) + return t1; - if (dt2 == data_types::u1) - return dt1; + if (t1.bitwidth() < t2.bitwidth()) + return t2; - if (size_of(dt1) < size_of(dt2)) - return dt2; + if (t1.bitwidth() > t2.bitwidth()) + return t1; - if (size_of(dt1) > size_of(dt2)) - return dt1; + if (t2.is_real()) + return t2; - if (is_floating_point(dt2)) - return dt2; - - return dt1; + return t1; } - static bool is_quantized(data_types dt) { - return is_i8_u8(dt); + static bool is_quantized(ov::element::Type t) { + return t.is_quantized(); } template @@ -132,7 +83,7 @@ struct data_type_traits { case data_types::i64: return static_cast(std::numeric_limits::max()); case data_types::f16: - return static_cast(65504); + return static_cast(std::numeric_limits::max()); case data_types::f32: return static_cast(std::numeric_limits::max()); default: @@ -152,7 +103,7 @@ struct data_type_traits { case data_types::i64: return static_cast(std::numeric_limits::lowest()); case data_types::f16: - return static_cast(-65504); + return static_cast(std::numeric_limits::lowest()); case data_types::f32: return static_cast(std::numeric_limits::lowest()); default: @@ -170,44 +121,17 @@ inline data_types element_type_to_data_type(ov::element::Type t) { switch (t) { case ov::element::Type_t::i16: case ov::element::Type_t::u16: - case ov::element::Type_t::f32: case ov::element::Type_t::f64: return cldnn::data_types::f32; - case ov::element::Type_t::f16: - return cldnn::data_types::f16; - case ov::element::Type_t::u8: - return cldnn::data_types::u8; - case ov::element::Type_t::i8: - return cldnn::data_types::i8; - case ov::element::Type_t::i32: case ov::element::Type_t::u32: case ov::element::Type_t::u64: return cldnn::data_types::i32; - case ov::element::Type_t::i64: - return cldnn::data_types::i64; case ov::element::Type_t::boolean: return cldnn::data_types::u8; - case ov::element::Type_t::u1: - return cldnn::data_types::u1; - default: - throw std::runtime_error("Can't convert " + t.get_type_name() + " element type"); + default: return t; } } -/// Helper function to get both data_types and format::type in a single, unique value. Useable in 'case' statement. -constexpr auto fuse(data_types dt, cldnn::format::type fmt) -> decltype(static_cast::type>(dt) | - static_cast::type>(fmt)) { - using dt_type = std::underlying_type::type; - using fmt_type = std::underlying_type::type; - using fmt_narrow_type = int16_t; - - return static_cast(fmt) <= std::numeric_limits::max() && - static_cast(dt) <= (std::numeric_limits::max() >> (sizeof(fmt_narrow_type) * 8)) - ? (static_cast(dt) << (sizeof(fmt_narrow_type) * 8)) | - (static_cast(fmt) >= 0 ? static_cast(fmt) : static_cast(-1)) - : throw std::invalid_argument("data_type and/or format values are too big to be fused into single value"); -} - /// @brief Represents data padding information. struct padding { /// @brief Filling value for padding area. diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp index 2f2e614c29f2c6..51f09989502a13 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/shape_predictor.hpp @@ -50,7 +50,7 @@ struct ShapePredictor { /// says if shape is successfully predicted and can be preallocated, and the second element is ov::Shape itself. std::pair predict_preallocation_shape(const std::string& id, const ov::Shape& current_shape, - size_t dt_size, + size_t dt_bitwidth, bool can_reuse_buffer); bool can_preallocate(size_t desired_buffer_size); diff --git a/src/plugins/intel_gpu/src/graph/condition.cpp b/src/plugins/intel_gpu/src/graph/condition.cpp index 842495e0b24e0d..8da80347ea66fd 100644 --- a/src/plugins/intel_gpu/src/graph/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/condition.cpp @@ -215,7 +215,7 @@ std::string condition_inst::to_string(condition_node const& node) { } /* -Condition primitive is resuing memory with the input. +Condition primitive is reusing memory with the input. */ condition_inst::typed_primitive_inst(network& network, condition_node const& node) : parent(network, node), diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index 537fa7412b09f5..9c3eb7c813045d 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -14,6 +14,7 @@ #include "arg_max_min_inst.h" #include "fully_connected_inst.h" #include "condition_inst.h" +#include "loop_inst.h" #include "program_node.h" #include @@ -74,7 +75,7 @@ void compile_graph::run(program& p) { if (node->is_dynamic() && !is_planar) can_select_impl = false; - if (node->is_type() || node->is_type()) + if (node->is_type() || node->is_type() || node->is_type()) can_select_impl = true; if (can_select_impl) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp index 1f2016e8d6706e..de6d6c62859bd9 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp @@ -23,6 +23,8 @@ bool can_shuffle_features(program_node& node, stream& stream) { if (node.is_type()) { auto& conv_node = node.as(); auto& wei_node = conv_node.weights(); + if (ov::element::Type(wei_node.get_output_layout().data_type).bitwidth() < 8) + return false; return conv_node.get_groups() == 1 && conv_node.get_deformable_groups() == 1 && !conv_node.get_transposed() && @@ -32,6 +34,8 @@ bool can_shuffle_features(program_node& node, stream& stream) { if (node.is_type()) { auto& fc_node = node.as(); auto& wei_node = fc_node.weights(); + if (ov::element::Type(wei_node.get_output_layout().data_type).bitwidth() < 8) + return false; return wei_node.is_type() && wei_node.is_constant() && !wei_node.is_output(); } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index 8a031e45582314..cf9f44a9a59686 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -402,6 +402,7 @@ void graph_initializations::set_outputs(program& p) { auto custom_outputs = p.get_config().get_property(ov::intel_gpu::custom_outputs); if (!custom_outputs.empty()) { for (auto const& output : custom_outputs) { + OPENVINO_ASSERT(p.has_node(output), "not found custom output node in current cldnn::program: ", output); auto o_node = p.get_node_ptr(output); o_node->set_output(true); p.outputs.push_back(o_node.get()); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp index d1b125aa8f1df5..f55d99b6a5fa80 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "fully_connected_inst.h" #include "pooling_inst.h" #include "quantize_inst.h" #include "reorder_inst.h" @@ -847,6 +848,42 @@ bool prepare_quantization::optimize_quantize(program &p, quantize_node& quantize return true; } +static void optimize_weights_decompression_parameters(fully_connected_node& fc_node, program& p) { + auto fc_prim = fc_node.get_primitive(); + if (!fc_prim->compressed_weights) + return; + + auto reorder_bfyx_to_fbyx = [&](size_t dep_id) { + auto& dep = fc_node.get_dependency(dep_id); + auto target_layout = dep.get_output_layout(); + target_layout.format = format::fbyx; + auto reorder_prim = std::make_shared(dep.id() + "_reorder", dep.id(), target_layout); + p.add_intermediate(reorder_prim, fc_node, dep_id, true); + fc_node.get_dependency(dep_id).recalc_output_layout(false); + }; + + auto need_reorder = [&](size_t dep_id) { + auto dep_layout = fc_node.get_input_layout(dep_id); + auto dep_pshape = dep_layout.get_partial_shape(); + + auto groups_count = dep_pshape[dep_pshape.size() - 1].get_length(); + + return groups_count > 1; + }; + + auto decompression_scale_idx = !fc_node.bias_term() ? 2 : 3; + if (need_reorder(decompression_scale_idx)) { + reorder_bfyx_to_fbyx(decompression_scale_idx); + } + + if (!fc_prim->decompression_zero_point.empty()) { + auto decompression_zp_idx = decompression_scale_idx + 1; + if (need_reorder(decompression_zp_idx)) { + reorder_bfyx_to_fbyx(decompression_zp_idx); + } + } +} + void prepare_quantization::run(program& p) { auto itr = p.get_processing_order().begin(); while (itr != p.get_processing_order().end()) { @@ -859,6 +896,8 @@ void prepare_quantization::run(program& p) { remove_fake_reorders(p, node->as()); } else if (node->is_type()) { prepare_asymmetric_quantization(p, node->as()); + } else if (node->is_type()) { + optimize_weights_decompression_parameters(node->as(), p); } } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 094e645a69e05f..769134e440b848 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -121,15 +121,10 @@ struct travel_direction_wrapper { static format get_target_output_format(layout_optimizer& lo, const std::map& fmt_map, program_node *node, program_node *next) { auto user_idx = node->get_user_index(*next); - bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); // 1. Check selected preferred_output_format - if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) { - // If onednn is not used, need to ignore get_preferred_output_fmt result as it is from onednn - auto ret = node->get_preferred_output_fmt(user_idx); - - if (ret != format::any) - return ret; - } + auto ret = node->get_preferred_output_fmt(user_idx); + if (ret != format::any) + return ret; // 2. Check fmt if (fmt_map.count(node) > 0) @@ -142,14 +137,10 @@ static format get_target_output_format(layout_optimizer& lo, const std::map& fmt_map, program_node *node, program_node *prev) { auto dep_idx = node->get_dependency_index(*prev); - bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); // 1. Check selected preferred_input_format - if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) { - // If onednn is not used, need to ignore get_preferred_input_fmt result as it is from onednn - auto ret = node->get_preferred_input_fmt(dep_idx); - if (ret != format::any) - return ret; - } + auto ret = node->get_preferred_input_fmt(dep_idx); + if (ret != format::any) + return ret; // 2. Check fmt if (fmt_map.count(node) > 0) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index 05dacd336a43f5..8b2b3a118a501f 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -32,11 +32,21 @@ void select_preferred_formats::run(program& p) { return; #ifdef ENABLE_ONEDNN_FOR_GPU + auto forcing_map = _lo.get_implementation_forcing(); + engine.create_onednn_engine(p.get_config()); for (auto n : p.get_processing_order()) { - if (n->is_input() || !_lo.are_data_types_suitable_for_onednn(*n)) { + if (n->is_input() || !layout_optimizer::is_node_suitable_for_onednn(*n)) { continue; } + + // skip to set preferred_formats if forcing_impl is not onednn. + if (std::find_if(forcing_map.begin(), forcing_map.end(), + [&n](std::map>::value_type const& it) { + return (it.first == n->id() && it.second.second != impl_types::onednn); + }) != forcing_map.end()) + continue; + // Onednn primitive descriptor creation may fail, for example, due to asymmetric weight. try { if (n->is_type()) { diff --git a/src/plugins/intel_gpu/src/graph/group_normalization.cpp b/src/plugins/intel_gpu/src/graph/group_normalization.cpp new file mode 100644 index 00000000000000..69b06343362570 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/group_normalization.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "group_normalization_inst.h" +#include "primitive_type_base.h" +#include "json_object.h" + +namespace cldnn { +GPU_DEFINE_PRIMITIVE_TYPE_ID(group_normalization) + +layout group_normalization_inst::calc_output_layout(group_normalization_node const& node, kernel_impl_params const& impl_param) { + assert(static_cast(impl_param.desc->output_data_types[0]) == false && + "Output data type forcing is not supported for group_normalization_node!"); + auto output_layout = impl_param.get_input_layout(); + + if (impl_param.has_fused_primitives()) + output_layout.data_type = impl_param.get_fused_output_layout().data_type; + + return output_layout; +} + +std::string group_normalization_inst::to_string(group_normalization_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + + std::stringstream primitive_description; + + json_composite group_normalization_info; + group_normalization_info.add("dimension", desc->num_groups); + group_normalization_info.add("epsilon", desc->epsilon); + + node_info->add("group_normalization_info", group_normalization_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +group_normalization_inst::typed_primitive_inst(network& network, group_normalization_node const& node) : parent(network, node) { +} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index 81f244051a25aa..b774b72dd506ec 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -12,6 +12,77 @@ namespace cldnn { namespace common { + +// read scala value from data primitive +static int64_t read_scalar_value(memory::ptr mem, stream& stream) { + int64_t trip_count = 0; + const layout& prim_layout = mem->get_layout(); + + switch (prim_layout.data_type) { + case data_types::u8: { + mem_lock lock_prim_output{mem, stream}; + trip_count = *lock_prim_output.data(); + break; + } + case data_types::i8: { + mem_lock lock_prim_output{mem, stream}; + trip_count = *lock_prim_output.data(); + break; + } + case data_types::i32: { + mem_lock lock_prim_output{mem, stream}; + trip_count = *lock_prim_output.data(); + break; + } + case data_types::i64: { + mem_lock lock_prim_output{mem, stream}; + trip_count = *lock_prim_output.data(); + break; + } + default: + OPENVINO_THROW("Invalid data type : ", ov::element::Type(prim_layout.data_type).get_type_name()); + } + return trip_count; +} + +template +static inline void validate_input_value(int64_t input) { + OPENVINO_ASSERT((input >= std::numeric_limits::min() && input <= std::numeric_limits::max()), + "Invalid data value : ", input); +} + +static void write_scalar_value(memory::ptr mem, stream& stream, int64_t input) { + const layout& prim_layout = mem->get_layout(); + + switch (prim_layout.data_type) { + case data_types::u8: { + validate_input_value(input); + mem_lock lock_prim_output{mem, stream}; + lock_prim_output[0] = static_cast(input); + break; + } + case data_types::i8: { + validate_input_value(input); + mem_lock lock_prim_output{mem, stream}; + lock_prim_output[0] = static_cast(input); + break; + } + case data_types::i32: { + validate_input_value(input); + mem_lock lock_prim_output{mem, stream}; + lock_prim_output[0] = static_cast(input); + break; + } + case data_types::i64: { + mem_lock lock_prim_output{mem, stream}; + lock_prim_output[0] = input; + break; + } + default: + OPENVINO_THROW("Invalid data type : ", ov::element::Type(prim_layout.data_type).get_type_name()); + } +} + struct loop_impl : typed_primitive_impl { using parent = typed_primitive_impl; using parent::parent; @@ -27,7 +98,6 @@ struct loop_impl : typed_primitive_impl { loop_impl() : parent() {} loop_impl(const loop_impl& other) : typed_primitive_impl(other), - _max_iteration(other._max_iteration), _back_edges(other._back_edges) {} explicit loop_impl(const loop_node& node) { @@ -37,63 +107,169 @@ struct loop_impl : typed_primitive_impl { void set_node_params(const program_node& arg) override { OPENVINO_ASSERT(arg.is_type()); const auto& node = arg.as(); - _max_iteration = node.get_max_iteration(); _back_edges = node.get_back_edges(); } + void set_memory_in_body_network(cldnn::network::ptr body_network, + const std::shared_ptr& inst, memory::ptr mem) const { + if (inst->is_input()) { + body_network->set_input_data(inst->id(), mem); + } else if (inst->is_output()) { + body_network->set_output_memory(inst->id(), mem); + } else { + inst->set_output_memory(mem, false); + } + } + + std::vector handle_buffers_for_next_iteration(const loop_inst::backedge_memory_mapping& mapping, + network::ptr body_network, int64_t iter, bool is_dynamic) const { + std::vector event_vec; + OPENVINO_ASSERT(iter >= 0, "iteration should not be negative : ", iter); + if (mapping.type == loop_inst::backedge_memory_mapping::CONCAT_OUTPUT) { + if (iter == 0) { + set_memory_in_body_network(body_network, mapping.to_primitive, mapping.initial_mem); + } else if (iter > 0) { + if (is_dynamic) { + auto from_id = mapping.from_primitive->id(); + if (body_network->has_event(from_id)) { + auto ev = body_network->get_primitive_event(from_id); + if (ev) ev->wait(); + } + // In dynamic model, just copy data from inner body output to inner body input in back_edges. + memory::ptr mem1 = mapping.to_primitive->output_memory_ptr(); + memory::ptr mem2 = mapping.from_primitive->output_memory_ptr(); + auto ev = mem1->copy_from(body_network->get_stream(), *(mem2)); + if (ev) event_vec = {ev}; + } else { + auto mem = mapping.concat_mem_mapping->get_sliced_mems().at(iter - 1); + set_memory_in_body_network(body_network, mapping.to_primitive, mem); + } + } + } else if (mapping.type == loop_inst::backedge_memory_mapping::SINGLE_SHARED) { + if (iter == 0) { + if (mapping.from_mem != nullptr) { + auto ev = mapping.from_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem)); + if (ev) event_vec = {ev}; + } + } else { + // In dynamic model, output memory is not defined before execution. + // After body network execution, replace input memory from initial_mem(external input memory) to output memory. + if (mapping.from_mem == nullptr) { + mapping.from_mem = mapping.from_primitive->output_memory_ptr(); + OPENVINO_ASSERT(mapping.from_mem != nullptr, "from_mem should not be null"); + set_memory_in_body_network(body_network, mapping.to_primitive, mapping.from_mem); + } + } + } else if (mapping.type == loop_inst::backedge_memory_mapping::SINGLE) { + memory::ptr mem1 = mapping.to_primitive->output_memory_ptr(); + if (iter == 0) { + auto ev = mem1->copy_from(body_network->get_stream(), *(mapping.initial_mem)); + if (ev) event_vec = {ev}; + } else { + if (is_dynamic) { + // In dynamic model, do not set memory buffer between input and output in inner body network. + // Just copy data from input buffer memory to output buffer memory. + auto from_id = mapping.from_primitive->id(); + if (body_network->has_event(from_id)) { + auto ev = body_network->get_primitive_event(from_id); + if (ev) ev->wait(); + } + memory::ptr mem2 = mapping.from_primitive->output_memory_ptr(); + auto ev = mem1->copy_from(body_network->get_stream(), *(mem2)); + if (ev) event_vec = {ev}; + } else { + // In static model, swap memory buffer between output and input in inner body network + memory::ptr mem2 = mapping.from_primitive->output_memory_ptr(); + set_memory_in_body_network(body_network, mapping.to_primitive, std::move(mem2)); + set_memory_in_body_network(body_network, mapping.from_primitive, std::move(mem1)); + } + } + } + return event_vec; + } + event::ptr execute_impl(const std::vector& events, loop_inst& instance) override { - const auto& primitive = instance.get_typed_desc(); + const auto& impl_params = instance.get_impl_params(); + const auto& primitive = impl_params->typed_desc(); auto& outer_network = instance.get_network(); auto& stream = outer_network.get_stream(); + const auto max_num_iterations = primitive->max_num_iterations; auto body_network = instance.get_body_network(); + int64_t current_iteration_idx = 0; auto ev = stream.create_user_event(false); - if (!instance.preproc_memories_done) { - instance.preprocess_output_memory(); - instance.preprocess_input_memory(); - instance.preprocess_backedge_memory(); - - // set input data for current_iteration primitive if current_iteration is used - if (!primitive->current_iteration_id.empty()) { - auto current_iteration_prim = body_network->get_primitive(primitive->current_iteration_id); - auto input_layout_prim = std::dynamic_pointer_cast(current_iteration_prim); - if (input_layout_prim == nullptr) { - CLDNN_ERROR_MESSAGE(instance.id(), "current_iteration primitive is not input_layout"); - } else { - const auto& backedge_mapping = instance.get_current_iteration_backedge_mapping(); - input_layout_prim->set_data(backedge_mapping.initial_mem); - } - } - instance.preproc_memories_done = true; - } + OPENVINO_ASSERT(!primitive->num_iteration_id.empty(), "loop operation should have num_iteration_id"); + ////////////////////////////////////////// + // memory pointers for outer network + ////////////////////////////////////////// // read trip_count from outer network - bool update_num_iterations = false; - memory::ptr trip_count_mem = outer_network.get_primitive(primitive->trip_count_id)->output_memory_ptr(); - int64_t trip_count = loop_node::read_scalar_value(std::move(trip_count_mem), stream); - if (trip_count < 0) { - trip_count = _max_iteration; - update_num_iterations = true; + int64_t trip_count = -1; + if (!primitive->trip_count_id.empty()) { + memory::ptr trip_count_mem = outer_network.get_primitive(primitive->trip_count_id)->output_memory_ptr(); + trip_count = read_scalar_value(std::move(trip_count_mem), stream); + } else { + trip_count = max_num_iterations; } // read initial execution condition from outer network - memory::ptr initial_execution_mem = outer_network.get_primitive(primitive->initial_execution_id)->output_memory_ptr(); - int64_t execution_condition = loop_node::read_scalar_value(initial_execution_mem, stream); + int64_t execution_condition = 1; + if (!primitive->first_execution_condition_id.empty()) { + memory::ptr first_execution_condition_mem = outer_network.get_primitive(primitive->first_execution_condition_id)->output_memory_ptr(); + execution_condition = read_scalar_value(first_execution_condition_mem, stream); + } + + // When execution_condition is false or trip_count is zero, return execute_impl without any body_network execution. + if (!execution_condition || trip_count == 0) { + // Update num_iterations (actual number of iterations) + memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr(); + write_scalar_value(num_actual_iterations_mem, stream, current_iteration_idx); + + instance.update_output_layout(); + ev->set(); + return ev; + } + ////////////////////////////////////////// + // memory pointers for body network + ////////////////////////////////////////// // shortcut of execution_condition memory in body network - memory::ptr execution_condition_mem = nullptr; - if (!primitive->condition_id.empty()) { - execution_condition_mem = body_network->get_primitive(primitive->condition_id)->output_memory_ptr(); + memory::ptr body_execution_condition_mem = nullptr; + if (!primitive->body_execution_condition_id.empty()) { + body_execution_condition_mem = body_network->get_primitive(primitive->body_execution_condition_id)->output_memory_ptr(); + } + + // shortcut of current_iteration memory in body network + if (!primitive->body_current_iteration_id.empty()) { + memory::ptr body_current_iteration_mem = body_network->get_primitive(primitive->body_current_iteration_id)->output_memory_ptr(); + write_scalar_value(body_current_iteration_mem, body_network->get_stream(), 0); + } + + const auto is_dynamic = instance.is_dynamic(); + if (is_dynamic) { + instance.update_shape(); + if (instance.shape_changed()) { + instance.preproc_memories_done = false; + instance.reset_memory(); + } + } + + if (!instance.preproc_memories_done) { + instance.preprocess_output_memory(trip_count); + instance.preprocess_input_memory(trip_count); + instance.preprocess_backedge_memory(); + instance.preproc_memories_done = true; } const auto& concatenated_input_mem_mappings = instance.concatenated_input_mem_mappings; const auto& concatenated_output_mem_mappings = instance.concatenated_output_mem_mappings; + const auto& backedge_memory_mappings = instance.backedge_memory_mappings; // If there are concatenated_output_mem_mappings or backedge_memory_mappings we need to wait for // previous tasks before accessing memory in get_sliced_mem() and setup_iteration() functions - if (!concatenated_input_mem_mappings.empty() || !instance.backedge_memory_mappings.empty()) { + if (!concatenated_input_mem_mappings.empty() || !backedge_memory_mappings.empty()) { for (auto& e : events) { e->wait(); } @@ -102,37 +278,36 @@ struct loop_impl : typed_primitive_impl { // Set sliced input data for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) { const auto& concatenated_input = concatenated_input_mem_mappings.at(i); - memory::ptr mem = concatenated_input.get_sliced_mem(0); - if (mem) { - body_network->set_input_data(concatenated_input.sliced_data_prim->id(), mem); - } else { - CLDNN_ERROR_MESSAGE(instance.id(), "sliced input memory of loop is not allocated properly"); - } + memory::ptr mem = concatenated_input->get_sliced_mem(0); + OPENVINO_ASSERT(mem != nullptr, instance.id(), "sliced input memory of loop is not allocated properly"); + body_network->set_input_data(concatenated_input->sliced_data_prim->id(), mem); } std::vector all_events; std::vector loop_carried_dep(events.begin(), events.end()); - int64_t current_iteration_idx = 0; - while (current_iteration_idx < trip_count && execution_condition) { + while (((trip_count <= 0) || (current_iteration_idx < trip_count)) && execution_condition) { // Copy & Set sliced input memory for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) { const auto& concatenated_input = concatenated_input_mem_mappings.at(i); - memory::ptr mem = concatenated_input.get_sliced_mem(current_iteration_idx); - if (mem) { - concatenated_input.sliced_data_prim->set_output_memory(mem); - } else { - CLDNN_ERROR_MESSAGE(instance.id(), "sliced input memory of loop is not allocated properly"); - } + memory::ptr mem = concatenated_input->get_sliced_mem(current_iteration_idx); + OPENVINO_ASSERT(mem != nullptr, instance.id(), " sliced input memory of loop is not allocated properly"); + concatenated_input->sliced_data_prim->set_output_memory(mem); } - // Set backedges - for (const auto& backedge_memory_mapping : instance.backedge_memory_mappings) { - backedge_memory_mapping.setup_iteration(current_iteration_idx); + // Set backedges and output memory + for (auto& backedge_memory_mapping : backedge_memory_mappings) { + auto event_vec = handle_buffers_for_next_iteration(backedge_memory_mapping, body_network, current_iteration_idx, is_dynamic); + for (auto ev : event_vec) { + loop_carried_dep.push_back(ev); + } } - // Set sliced output memory - for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) { - concat_output_mem_mapping.setup_sliced_output_memory(current_iteration_idx); + if (!is_dynamic) { + // Set sliced output memory for static shape model + // because body network generate output memory during the body network execution in dynamic model + for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) { + concat_output_mem_mapping->setup_sliced_output_memory(current_iteration_idx); + } } // execute body network @@ -141,9 +316,10 @@ struct loop_impl : typed_primitive_impl { loop_carried_dep.clear(); for (const auto& backedge : _back_edges) { event::ptr body_event; - if (body_network->has_event(backedge.from)) + if (body_network->has_event(backedge.from)) { body_event = body_network->get_primitive_event(backedge.from); - loop_carried_dep.emplace_back(body_event); + loop_carried_dep.emplace_back(body_event); + } } // Collect output events for waiting for all iterations finishing @@ -155,42 +331,59 @@ struct loop_impl : typed_primitive_impl { } } - //TODO: execution_condition is prepared as they are presented in the - // ngraph opset document for loop operation. - // However they are not being used yet and only TensorIterator which - // has fixed sequence length is being validated. - if (!primitive->condition_id.empty()) { - execution_condition = loop_node::read_scalar_value(execution_condition_mem, stream); + // Store output of sliced_data_prim to sliced mems vector + // After execution of body network, sliced_data_prim will has output memory buffer + // current memory buffer move to sliced_mems and new memory buffer will be allocated in sliced_data_prim + if (is_dynamic) { + for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) { + auto sliced_data_prim = concat_output_mem_mapping->sliced_data_prim; + auto output_mem_ptr = sliced_data_prim->output_memory_ptr(); + + auto sliced_id = sliced_data_prim->id(); + if (body_network->has_event(sliced_id)) { + auto ev = body_network->get_primitive_event(sliced_id); + if (ev) ev->wait(); + } + memory::ptr new_sliced_mem = concat_output_mem_mapping->get_or_create_sliced_mem(current_iteration_idx, + output_mem_ptr->get_layout()); + auto ev = new_sliced_mem->copy_from(body_network->get_stream(), *output_mem_ptr); + if (ev) { + loop_carried_dep.push_back(ev); + all_events.push_back(ev); + } + } } - // update index & execution condition for the next iteration - ++current_iteration_idx; + // execution condition is the result of body network execution + if (body_execution_condition_mem != nullptr) { + auto execution_id = primitive->body_execution_condition_id; + if (body_network->has_event(execution_id)) { + auto ev = body_network->get_primitive_event(execution_id); + if (ev) ev->wait(); + } + execution_condition = read_scalar_value(body_execution_condition_mem, body_network->get_stream()); + } + GPU_DEBUG_IF(!execution_condition) { + GPU_DEBUG_LOG << "body_exec_condition is false at "<< current_iteration_idx << " iterations" << std::endl; + } + + current_iteration_idx++; } // Reset network and wait for all collected events body_network->reset_execution(false); stream.wait_for_events(all_events); - // Concatenate sliced output to the outer network - for (size_t i = 0; i < concatenated_output_mem_mappings.size(); ++i) { - const auto& concat_output = concatenated_output_mem_mappings.at(i); - concat_output.restore_concatenated_mem(); - } + // Update actual num iteration + // update num_iterations (actual number of iterations) + memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr(); + write_scalar_value(num_actual_iterations_mem, stream, current_iteration_idx); + GPU_DEBUG_LOG << "current_iteration(" << primitive->num_iteration_id << ", " + << num_actual_iterations_mem << ") : " << current_iteration_idx << std::endl; - if (update_num_iterations) { - // update num_iterations (actual number of iterations) - int64_t actual_iterations = 0; - if (!primitive->current_iteration_id.empty()) { - const auto& backedge_mapping = instance.get_current_iteration_backedge_mapping(); - auto current_iteration_mem = backedge_mapping.from_primitive->output_memory_ptr(); - actual_iterations = loop_node::read_scalar_value(current_iteration_mem, stream); - } else { - actual_iterations = current_iteration_idx; - } - - memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr(); - loop_node::write_scalar_value(num_actual_iterations_mem, stream, actual_iterations); - } + if (is_dynamic) + instance.update_output_layout(); + instance.postprocess_output_memory(is_dynamic); ev->set(); return ev; @@ -202,23 +395,25 @@ struct loop_impl : typed_primitive_impl { void save(BinaryOutputBuffer& ob) const override { parent::save(ob); - ob << _max_iteration; ob << _back_edges; } void load(BinaryInputBuffer& ib) override { parent::load(ib); - ib >> _max_iteration; ib >> _back_edges; } private: - int64_t _max_iteration = 0; std::vector _back_edges; }; namespace detail { attach_loop_common::attach_loop_common() { + implementation_map::add(impl_types::common, + shape_types::dynamic_shape, + loop_impl::create, + {}, + {}); implementation_map::add(impl_types::common, loop_impl::create, {}); } } // namespace detail diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp index 57c0f057455ba7..7f1e7abcb9b580 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/activation.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/core/type/element_type_traits.hpp" #include "register.hpp" #include "activation_inst.h" #include "implementation_map.hpp" @@ -108,7 +109,7 @@ struct activation_impl : public typed_primitive_impl { input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); // Most of the evaluate functions expect same data type for all inputs, so we need to convert params from float - typename data_type_to_type
::type param_a = static_cast::type>(additional_params.a); + auto param_a = static_cast::value_type>(additional_params.a); auto input_dt = instance.get_input_layout().data_type; diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 9e9cd8b1c93389..f15d143e28539c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -839,11 +839,11 @@ struct detection_output_impl : typed_primitive_impl { std::vector>>> scoreIndexPairs; if (instance.location_memory()->get_layout().data_type == data_types::f32) { - prepare_data::type>(stream, instance, bboxes, confidences, scoreIndexPairs); - generate_detections::type>(stream, instance, num_of_images, bboxes, confidences, scoreIndexPairs); + prepare_data::value_type>(stream, instance, bboxes, confidences, scoreIndexPairs); + generate_detections::value_type>(stream, instance, num_of_images, bboxes, confidences, scoreIndexPairs); } else { - prepare_data::type>(stream, instance, bboxes, confidences, scoreIndexPairs); - generate_detections::type>(stream, instance, num_of_images, bboxes, confidences, scoreIndexPairs); + prepare_data::value_type>(stream, instance, bboxes, confidences, scoreIndexPairs); + generate_detections::value_type>(stream, instance, num_of_images, bboxes, confidences, scoreIndexPairs); } ev->set(); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp index 7afca0cb91c91f..cfb05c176c06ca 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp @@ -149,9 +149,9 @@ vector2D load_boxes(stream& stream, memory::ptr mem, bool center_p auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::f16: - return load_boxes_impl::type>(stream, mem, center_point); + return load_boxes_impl::value_type>(stream, mem, center_point); case cldnn::data_types::f32: - return load_boxes_impl::type>(stream, mem, center_point); + return load_boxes_impl::value_type>(stream, mem, center_point); default: throw std::runtime_error("Non max suppression - unsupported boxes data type"); } @@ -186,9 +186,9 @@ vector3D load_scores(stream& stream, memory::ptr mem) { auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::f16: - return load_scores_impl::type>(stream, mem); + return load_scores_impl::value_type>(stream, mem); case cldnn::data_types::f32: - return load_scores_impl::type>(stream, mem); + return load_scores_impl::value_type>(stream, mem); default: throw std::runtime_error("Non max suppression - unsupported scores data type"); } @@ -207,11 +207,11 @@ T load_scalar(stream& stream, memory::ptr mem) { auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - return load_scalar_impl::type>(stream, mem); + return load_scalar_impl::value_type>(stream, mem); case cldnn::data_types::f16: - return load_scalar_impl::type>(stream, mem); + return load_scalar_impl::value_type>(stream, mem); case cldnn::data_types::f32: - return load_scalar_impl::type>(stream, mem); + return load_scalar_impl::value_type>(stream, mem); default: throw std::runtime_error("Non max suppression - unsupported data type"); } @@ -244,13 +244,13 @@ void store_result(stream& stream, memory::ptr mem, const std::vectorget_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - store_result_impl::type>(stream, mem, result); + store_result_impl::value_type>(stream, mem, result); break; case cldnn::data_types::f16: - store_result_impl::type>(stream, mem, result); + store_result_impl::value_type>(stream, mem, result); break; case cldnn::data_types::f32: - store_result_impl::type>(stream, mem, result); + store_result_impl::value_type>(stream, mem, result); break; default: throw std::runtime_error("Non max suppression - unsupported output data type"); @@ -261,10 +261,10 @@ void store_first_output(stream& stream, memory::ptr mem, const std::vectorget_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - store_result_impl::type>(stream, mem, result); + store_result_impl::value_type>(stream, mem, result); break; case cldnn::data_types::i64: - store_result_impl::type>(stream, mem, result); + store_result_impl::value_type>(stream, mem, result); break; default: throw std::runtime_error("Non max suppression - unsupported output data type"); @@ -298,10 +298,10 @@ void store_second_output(stream& stream, memory::ptr mem, const std::vectorget_layout().data_type; switch (data_type) { case cldnn::data_types::f16: - store_second_output_impl::type>(stream, mem, result); + store_second_output_impl::value_type>(stream, mem, result); break; case cldnn::data_types::f32: - store_second_output_impl::type>(stream, mem, result); + store_second_output_impl::value_type>(stream, mem, result); break; default: throw std::runtime_error("Non max suppression - unsupported second output data type"); @@ -319,10 +319,10 @@ void store_third_output(stream& stream, memory::ptr mem, const std::vectorget_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - store_third_output_impl::type>(stream, mem, result); + store_third_output_impl::value_type>(stream, mem, result); break; case cldnn::data_types::i64: - store_third_output_impl::type>(stream, mem, result); + store_third_output_impl::value_type>(stream, mem, result); break; default: throw std::runtime_error("Non max suppression - unsupported third output data type"); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp index 461035c1defd75..2670949f8e9284 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/proposal.cpp @@ -396,9 +396,9 @@ struct proposal_impl : typed_primitive_impl { auto ev = instance.get_network().get_stream().create_user_event(false); im_info_t im_info; if (instance.dep_memory(proposal_inst::image_info_index).get_layout().data_type == data_types::f16) { - read_image_info::type>(stream, instance, im_info); + read_image_info::value_type>(stream, instance, im_info); } else { - read_image_info::type>(stream, instance, im_info); + read_image_info::value_type>(stream, instance, im_info); } if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type != @@ -408,26 +408,26 @@ struct proposal_impl : typed_primitive_impl { if (instance.dependencies().size() == 4) { auto proposal_probabilities = instance.dep_memory_ptr(proposal_inst::proposal_probabilities_out); if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) { - mem_lock::type, mem_lock_type::read> proposal_prob_ptr{proposal_probabilities, stream}; - execute::type>(stream, instance, im_info, proposal_prob_ptr.data()); + mem_lock::value_type, mem_lock_type::read> proposal_prob_ptr{proposal_probabilities, stream}; + execute::value_type>(stream, instance, im_info, proposal_prob_ptr.data()); } else { - mem_lock::type, mem_lock_type::read> proposal_prob_ptr{proposal_probabilities, stream}; - execute::type>(stream, instance, im_info, proposal_prob_ptr.data()); + mem_lock::value_type, mem_lock_type::read> proposal_prob_ptr{proposal_probabilities, stream}; + execute::value_type>(stream, instance, im_info, proposal_prob_ptr.data()); } } else if (instance.outputs_memory_count() == 2) { auto proposal_probabilities = instance.output_memory_ptr(1); if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) { - mem_lock::type, mem_lock_type::write> proposal_prob_ptr{proposal_probabilities, stream}; - execute::type>(stream, instance, im_info, proposal_prob_ptr.data()); + mem_lock::value_type, mem_lock_type::write> proposal_prob_ptr{proposal_probabilities, stream}; + execute::value_type>(stream, instance, im_info, proposal_prob_ptr.data()); } else { - mem_lock::type, mem_lock_type::write> proposal_prob_ptr{proposal_probabilities, stream}; - execute::type>(stream, instance, im_info, proposal_prob_ptr.data()); + mem_lock::value_type, mem_lock_type::write> proposal_prob_ptr{proposal_probabilities, stream}; + execute::value_type>(stream, instance, im_info, proposal_prob_ptr.data()); } } else { if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) { - execute::type>(stream, instance, im_info); + execute::value_type>(stream, instance, im_info); } else { - execute::type>(stream, instance, im_info); + execute::value_type>(stream, instance, im_info); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 43ce081d2f69ea..19007a481579f6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -110,20 +110,13 @@ struct fully_connected_impl : typed_primitive_impl_ocl { bool has_scale = !primitive->decompression_scale.empty(); size_t offset = primitive->bias.empty() ? 2 : 3; - const auto& weights_pshape = input1_layout.get_partial_shape(); if (has_scale) { auto scale_layout = input_layouts[offset++]; - if (input1_pshape.size() != 2) { - scale_layout.set_partial_shape(reshape_to_2d(scale_layout.get_partial_shape(), weights_pshape[0], primitive->weights_rank)); - } layouts.push_back(scale_layout); } if (has_zp) { auto zp_layout = input_layouts[offset]; - if (input1_pshape.size() != 2) { - zp_layout.set_partial_shape(reshape_to_2d(zp_layout.get_partial_shape(), weights_pshape[0], primitive->weights_rank)); - } layouts.push_back(zp_layout); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/group_normalization.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/group_normalization.cpp new file mode 100644 index 00000000000000..5296c1dda7e7aa --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/group_normalization.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "primitive_base.hpp" +#include "group_normalization_inst.h" +#include "group_normalization/group_normalization_kernel_ref.h" +#include "group_normalization/group_normalization_kernel_selector.h" + +namespace cldnn { +namespace ocl { + +struct group_normalization_impl : typed_primitive_impl_ocl { + using parent = typed_primitive_impl_ocl; + using parent::parent; + using kernel_selector_t = kernel_selector::group_normalization_kernel_selector; + using kernel_params_t = std::pair; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::group_normalization_impl) + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { + const auto& primitive = impl_param.typed_desc(); + auto params = get_default_params(impl_param, is_shape_agnostic); + params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); + params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(2))); + auto optional_params = get_default_optional_params(impl_param.get_program()); + params.num_groups = primitive->num_groups; + params.epsilon = primitive->epsilon; + return {params, optional_params}; + } + + void update_dispatch_data(const kernel_impl_params& impl_param) override { + auto kernel_params = get_kernel_params(impl_param, true); + (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); + } +}; + +namespace detail { + +attach_group_normalization_impl::attach_group_normalization_impl() { + auto types = {data_types::f16, data_types::f32}; + auto formats = { + format::bfyx, + format::byxf, + format::yxfb, + format::bfzyx, + format::b_fs_yx_fsv2, + format::b_fs_zyx_fsv2, + format::b_fs_yx_fsv4, + format::b_fs_zyx_fsv4, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, + format::b_fs_zyx_fsv16, + format::b_fs_zyx_fsv32, + }; + + implementation_map::add(impl_types::ocl, shape_types::static_shape, + typed_primitive_impl_ocl::create, + types, + formats); +} + +} // namespace detail +} // namespace ocl +} // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::group_normalization_impl) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::group_normalization) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 3392a7e42b2363..f0872d3702970e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -422,6 +422,8 @@ kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) { return kernel_selector::weights_layout::yxio; case format::os_yxi_osv16: return kernel_selector::weights_layout::os_yxi_osv16; + case format::o_is_yx_isv2: + return kernel_selector::weights_layout::o_is_yx_isv2; case format::o_is_yx_isv4: return kernel_selector::weights_layout::o_is_yx_isv4; case format::o_is_yx_isv16: @@ -741,6 +743,8 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { return cldnn::format::yxio; case kernel_selector::weights_layout::os_yxi_osv16: return cldnn::format::os_yxi_osv16; + case kernel_selector::weights_layout::o_is_yx_isv2: + return cldnn::format::o_is_yx_isv2; case kernel_selector::weights_layout::o_is_yx_isv4: return cldnn::format::o_is_yx_isv4; case kernel_selector::weights_layout::o_is_yx_isv16: diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp index 3a287bdeda4f7d..6b35b9cdfb16ce 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp @@ -40,6 +40,7 @@ void register_implementations() { REGISTER_OCL(gemm); REGISTER_OCL(generate_proposals); REGISTER_OCL(grid_sample); + REGISTER_OCL(group_normalization); REGISTER_OCL(lrn); REGISTER_OCL(lstm_gemm); REGISTER_OCL(lstm_elt); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp index a5fb5a5817e395..45f4018bf90dac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp @@ -35,6 +35,7 @@ #include "intel_gpu/primitives/gemm.hpp" #include "intel_gpu/primitives/grid_sample.hpp" #include "intel_gpu/primitives/grn.hpp" +#include "intel_gpu/primitives/group_normalization.hpp" #include "intel_gpu/primitives/lrn.hpp" #include "intel_gpu/primitives/lstm.hpp" #include "intel_gpu/primitives/lstm_dynamic.hpp" @@ -120,6 +121,7 @@ REGISTER_OCL(gather_elements); REGISTER_OCL(gemm); REGISTER_OCL(generate_proposals); REGISTER_OCL(grid_sample); +REGISTER_OCL(group_normalization); REGISTER_OCL(lrn); REGISTER_OCL(lstm_gemm); REGISTER_OCL(lstm_elt); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 075929afa765fb..aa11884b2445bc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -121,9 +121,9 @@ struct convolution_onednn : typed_primitive_onednn_impl { } if (a_zp_dtype == data_types::i8) { - set_activation_zero_points_attr::type>(attrs, a_zp.as(), zero_point_mask); + set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); } else { // if (a_zp_dtype == data_types::u8) - set_activation_zero_points_attr::type>(attrs, a_zp.as(), zero_point_mask); + set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); } } diff --git a/src/plugins/intel_gpu/src/graph/include/group_normalization_inst.h b/src/plugins/intel_gpu/src/graph/include/group_normalization_inst.h new file mode 100644 index 00000000000000..27fe382146999b --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/group_normalization_inst.h @@ -0,0 +1,39 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "intel_gpu/primitives/group_normalization.hpp" +#include "primitive_inst.h" + +namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + std::vector get_shape_infer_dependencies() const override { return {}; } +}; +using group_normalization_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + +public: + template + static std::vector calc_output_layouts(group_normalization_node const& /*node*/, const kernel_impl_params& impl_param) { + return forward_input0_shape(impl_param); + } + + static layout calc_output_layout(group_normalization_node const& node, kernel_impl_params const& impl_param); + static std::string to_string(group_normalization_node const& node); + + typed_primitive_inst(network& network, group_normalization_node const& desc); +}; + +using group_normalization_inst = typed_primitive_inst; + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h index fd048838c704ea..e9ed4ec959ae1c 100644 --- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h +++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h @@ -169,6 +169,7 @@ class layout_optimizer { impl_types get_preferred_impl_type(program_node& node, format preferred_format); impl_types get_forced_impl_type_by_config(program_node& node); + static bool is_node_suitable_for_onednn(program_node& node); static bool are_data_types_suitable_for_onednn(program_node& node); bool are_layouts_suitable_for_onednn(program_node& node); static bool onednn_check_data_types_for_pooling(data_types in_dt, data_types out_dt); @@ -188,6 +189,7 @@ class layout_optimizer { optimization_attributes get_optimization_attributes() { return _optimization_attributes; } void set_implementation_forcing(const ov::intel_gpu::ImplForcingMap& map); + const std::map> get_implementation_forcing() const; void update_formats_map(const convolution_node& node); bool is_format_optimized(const convolution_node& node, const format& format, bool use_weak_restrictions = false); diff --git a/src/plugins/intel_gpu/src/graph/include/loop_inst.h b/src/plugins/intel_gpu/src/graph/include/loop_inst.h index 5d7dd710892181..22f4489ae507b5 100644 --- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h @@ -21,163 +21,52 @@ template<> struct typed_program_node : public typed_program_node_base { private: using parent = typed_program_node_base; - mutable topology body; - std::vector input_primitive_maps; - std::vector output_primitive_maps; - mutable std::vector back_edges; - bool use_current_iteration; - bool use_execution_condition; - mutable program::ptr body_program; + std::vector& input_primitive_maps; + std::vector& output_primitive_maps; + std::vector& back_edges; public: - typed_program_node(std::shared_ptr prim, program& prog) : + typed_program_node(std::shared_ptr prim, program& prog) : parent(prim, prog), - body(this->get_primitive()->body), - input_primitive_maps(this->get_primitive()->input_primitive_maps), - output_primitive_maps(this->get_primitive()->output_primitive_maps), - back_edges(this->get_primitive()->back_edges), - use_current_iteration(!this->get_primitive()->current_iteration_id.empty()), - use_execution_condition(!this->get_primitive()->condition_id.empty()), - iteration_axis(0), - max_iteration(this->get_primitive()->max_iteration < 0 ? DEFAULT_MAX_NUM_ITERATION : this->get_primitive()->max_iteration) {} - - mutable size_t iteration_axis; - int64_t max_iteration; - - int64_t get_max_iteration() const { return max_iteration; } - program::ptr get_body_program() const { return body_program; } - bool is_current_iteration_used() const { return use_current_iteration; } - bool is_execution_condition_used() const { return use_execution_condition; } - - static size_t convert_to_raw_axis(size_t axis, size_t ndim) { - // convert between bfyx, bfzyx, bfzyxw and tensor.size.raw - if (axis >= ndim) { - throw std::runtime_error("axis should be less than ndim"); - } - - if (axis < 2) { - return axis; - } - return (ndim - 1) - (axis - 2); - } - - // read scala value from data primitive - static int64_t read_scalar_value(memory::ptr mem, stream& stream) { - int64_t trip_count = 0; - const layout& prim_layout = mem->get_layout(); - - switch (prim_layout.data_type) { - case data_types::u8: { - mem_lock lock_prim_output{mem, stream}; - trip_count = *lock_prim_output.data(); - break; - } - case data_types::i8: { - mem_lock lock_prim_output{mem, stream}; - trip_count = *lock_prim_output.data(); - break; - } - case data_types::i32: { - mem_lock lock_prim_output{mem, stream}; - trip_count = *lock_prim_output.data(); - break; - } - case data_types::i64: { - mem_lock lock_prim_output{mem, stream}; - trip_count = *lock_prim_output.data(); - break; - } - default: - throw std::runtime_error("Invalid data type : " + ov::element::Type(prim_layout.data_type).get_type_name()); - } - return trip_count; - } + input_primitive_maps(prim->input_primitive_maps), + output_primitive_maps(prim->output_primitive_maps), + back_edges(prim->back_edges) {} - template - static inline void validate_input_value(int64_t input) { - if (input < std::numeric_limits::min() || input > std::numeric_limits::max()) { - throw std::runtime_error("Invalid data value : " + std::to_string(input)); - } - } + program::ptr get_body_program() const { return get_primitive()->body_program; } - static void write_scalar_value(memory::ptr mem, stream& stream, int64_t input) { - const layout& prim_layout = mem->get_layout(); - - switch (prim_layout.data_type) { - case data_types::u8: { - validate_input_value(input); - mem_lock lock_prim_output{mem, stream}; - lock_prim_output[0] = static_cast(input); - break; - } - case data_types::i8: { - validate_input_value(input); - mem_lock lock_prim_output{mem, stream}; - lock_prim_output[0] = static_cast(input); - break; - } - case data_types::i32: { - validate_input_value(input); - mem_lock lock_prim_output{mem, stream}; - lock_prim_output[0] = static_cast(input); - break; - } - case data_types::i64: { - mem_lock lock_prim_output{mem, stream}; - lock_prim_output[0] = input; - break; - } - default: - throw std::runtime_error("Invalid data type : " + ov::element::Type(prim_layout.data_type).get_type_name()); - } - } - - layout calc_body_input_layout(const loop::io_primitive_map& inputDesc) const { - const auto& dependency_list = this->get_dependencies(); - auto input = std::find_if(dependency_list.begin(), dependency_list.end(), [&inputDesc](const std::pair& dep){ - return dep.first->id() == inputDesc.external_id; - }); - if (input == dependency_list.end()) { - throw std::runtime_error("Can't find input from dependency_list"); - } - layout calculated_layout = (*input).first->get_output_layout(); - auto shape = calculated_layout.get_tensor().sizes(calculated_layout.format); - - if (inputDesc.axis >= 0) { - iteration_axis = convert_to_raw_axis(static_cast(inputDesc.axis), shape.size()); - auto calculated_size = calculated_layout.get_tensor(); - calculated_size.raw[iteration_axis] = 1; // cropped inputs shape - calculated_layout.set_tensor(calculated_size); - } - - return calculated_layout; - } + const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; } + const primitive_id& get_initial_execution_id() const { return get_primitive()->first_execution_condition_id; } + const primitive_id& get_current_iteration_id() const { return get_primitive()->body_current_iteration_id; } + const primitive_id& get_execution_condition_id() const { return get_primitive()->body_execution_condition_id; } + const primitive_id& get_num_iterations_id() const { return get_primitive()->num_iteration_id; } + const int32_t get_max_num_iteration() const { return get_primitive()->max_num_iterations; } const std::vector& get_input_primitive_maps() const { return input_primitive_maps; } const std::vector& get_output_primitive_maps() const { return output_primitive_maps; } + const std::vector& get_back_edges() const { return back_edges;} void update_primitive_map(const primitive_id& prevID, const primitive_id& newID, bool external_id = true) { if (external_id) { for (auto& pm : input_primitive_maps) { - if (pm.external_id == prevID) { - pm.external_id = newID; + if (pm.external_id.pid == prevID) { + pm.external_id.pid = newID; } } for (auto& pm : output_primitive_maps) { - if (pm.external_id == prevID) { - pm.external_id = newID; + if (pm.external_id.pid == prevID) { + pm.external_id.pid = newID; } } } else { for (auto& pm : input_primitive_maps) { - if (pm.internal_id == prevID) { - pm.internal_id = newID; + if (pm.internal_id.pid == prevID) { + pm.internal_id.pid = newID; } } for (auto& pm : output_primitive_maps) { - if (pm.internal_id == prevID) { - pm.internal_id = newID; + if (pm.internal_id.pid == prevID) { + pm.internal_id.pid = newID; } } for (auto& back_edge : back_edges) { @@ -191,157 +80,266 @@ struct typed_program_node : public typed_program_node_base { } } - const std::vector& get_back_edges() const { return back_edges;} - - static bool is_integer(const data_types& data_type) { - switch (data_type) { - case data_types::u8: - case data_types::i8: - case data_types::i32: - case data_types::i64: - return true; - default: - return false; - } + // current_iteration is necessary to calculate output layout in dynamic shape + std::vector get_shape_infer_dependencies() const override { return {0}; } + + using parent::get_kernel_impl_params; + std::unique_ptr get_kernel_impl_params(const std::vector& in_layouts, const std::vector& out_layouts) const override { + auto params = parent::get_kernel_impl_params(in_layouts, out_layouts); + params->inner_progs = { get_primitive()->body_program }; + // Set memory_deps using custom get_memory_deps to add current_iteration(mutable_data) into memory_deps + params->memory_deps = get_memory_deps(); + return params; } - void process_current_iteration() const { - const primitive_id& current_iteration_id = get_current_iteration_id(); - if (current_iteration_id.empty()) { - return; - } +private: + std::map get_memory_deps() const; +}; - const topology_map& body_topology_map = body.get_primitives(); - const layout body_input_layout(data_types::i64, format::bfyx, {1, 1, 1, 1}); +using loop_node = typed_program_node; - // add current_iteration primitive if current_iteration primitive is not exist in body - if (body_topology_map.find(current_iteration_id) == body_topology_map.end()) { - body.add_primitive(std::make_shared(current_iteration_id, body_input_layout)); - } else { - const auto& body_input_prim = body.at(current_iteration_id); - const auto input_layout_prim = std::dynamic_pointer_cast(body_input_prim); - OPENVINO_ASSERT(input_layout_prim, "[GPU] current_iteration primitive should be cldnn::input_layout in node", this->id()); - input_layout_prim->change_layout(body_input_layout); +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + +public: + struct concatenated_memory_mapping { + using ptr = std::shared_ptr; + using cptr = std::shared_ptr; + concatenated_memory_mapping(int64_t axis, + memory::ptr concatenated_mem, + std::vector sliced_mems, // To change shared ptr vector + stream& stream, + engine& engine, + int64_t iteration_elements = 0, + int64_t stride = 0, + int64_t initial_offset = 0) : + axis(axis), + concatenated_mem(concatenated_mem), + sliced_mems(sliced_mems), + stream(stream), + engine(engine), + iteration_elements(iteration_elements), + stride(stride), + initial_offset(initial_offset) { + calculate_concatenated_mem(); + } + + concatenated_memory_mapping(const concatenated_memory_mapping& o) : + axis(o.axis), + concat_data_prim(o.concat_data_prim), + sliced_data_prim(o.sliced_data_prim), + + concatenated_mem(o.concatenated_mem), + sliced_mems(o.sliced_mems), + stream(o.stream), + engine(o.engine), + iteration_elements(o.iteration_elements), + stride(o.stride), + initial_offset(o.initial_offset), + + bytes_per_element(o.bytes_per_element), + batch_size(o.batch_size), + bytes_batch_stride(o.bytes_batch_stride), + bytes_iteration(o.bytes_iteration), + bytes_iteration_stride(o.bytes_iteration_stride), + bytes_iteration_initial_offset(o.bytes_iteration_initial_offset) {} + + + static int64_t get_batch_size(layout mem_layout, int64_t axis) { + if (axis < 0) { + throw std::runtime_error("axis should be positive integer or zero"); + } + + if (mem_layout.is_dynamic()) { + return -1; + } + + int64_t batch_size = 1; + for (int64_t i = 0; i < axis; ++i) { + batch_size *= mem_layout.get_tensor().raw[i]; + } + for (int64_t i = axis-1; i >= 2; --i) { + batch_size *= mem_layout.get_tensor().raw[i]; + } + return batch_size; } - // add incremental data: 1 - // it is used to update current_iteration in body network - const primitive_id increment_value_id = current_iteration_id + "_inc"; - auto mem = get_program().get_engine().allocate_memory(body_input_layout); - auto& stream = get_program().get_stream(); - write_scalar_value(mem, stream, 1); - body.add_primitive(std::make_shared(increment_value_id, mem)); - - // add eltwise sum updating current_iteration with incremental data - const primitive_id updated_currnet_iteration_id = current_iteration_id + "_update"; - body.add_primitive(std::make_shared(updated_currnet_iteration_id, - current_iteration_id, increment_value_id, eltwise_mode::sum)); - - // set backedge - back_edges.emplace_back(updated_currnet_iteration_id, current_iteration_id); - } + void calculate_concatenated_mem() const { + if (!sliced_mems.empty() && concatenated_mem != nullptr) { + auto& sliced_layout = sliced_mems.front()->get_layout(); + const int64_t num_elements_batch = get_batch_size(sliced_layout, axis); + iteration_elements = sliced_layout.count() / num_elements_batch; + bytes_per_element = data_type_traits::size_of(concatenated_mem->get_layout().data_type); + batch_size = get_batch_size(concatenated_mem->get_layout(), axis); + bytes_batch_stride = (static_cast(concatenated_mem->get_layout().count()) / batch_size) * bytes_per_element; + bytes_iteration = iteration_elements * bytes_per_element; + bytes_iteration_stride = stride * bytes_iteration; + bytes_iteration_initial_offset = initial_offset * bytes_iteration; + } + } - void process_single_int_output(const primitive_id& id) const { - // add mutable if not exist - const topology_map& body_topology_map = body.get_primitives(); - layout body_output_layout(data_types::i64, format::bfyx, {1, 1, 1, 1}); - if (!id.empty()) { - auto body_output = body_topology_map.find(id); - if (body_output == body_topology_map.end()) { - auto mem = get_program().get_engine().allocate_memory(body_output_layout); - auto md = std::make_shared(id, mem); - body.add_primitive(md); + void update_concatenated_mem(memory::ptr mem) { + if (concatenated_mem != nullptr && concatenated_mem->get_layout() == mem->get_layout()) { + concatenated_mem = mem; } else { - auto body_output_prim = body.at(body_output->first); - auto mem = get_program().get_engine().allocate_memory(body_output_layout); - body_output_prim.reset(new mutable_data(body_output->first, std::move(mem))); + concatenated_mem = mem; + calculate_concatenated_mem(); } } - } - void build_body_program() const { - for (const auto& pm : input_primitive_maps) { - layout calculated_layout = calc_body_input_layout(pm); - const primitive_id& internal_input_id = pm.internal_id; + void restore_concatenated_mem() const { + OPENVINO_ASSERT(concatenated_mem != nullptr, "concatenated_mem should not be nullptr"); + mem_lock concat_mem_lock{ concatenated_mem, stream }; + int64_t iteration_offset = bytes_iteration_initial_offset; + for (const auto& sliced_mem : sliced_mems) { + // To support multi-batch, just repeat memcpy for each batch + for (int64_t batch = 0; batch < batch_size; ++batch) { + const int64_t src_offset = batch * bytes_iteration; + const int64_t dst_offset = batch * bytes_batch_stride + iteration_offset; + mem_lock sliced_mem_lock{ sliced_mem, stream }; + uint8_t* src = sliced_mem_lock.data() + src_offset; + uint8_t* dst = concat_mem_lock.data() + dst_offset; + std::copy(src, src + bytes_iteration, dst); + } + iteration_offset += bytes_iteration_stride; + } + } - // add inputs for body network if not exist - if (body.get_primitives().count(internal_input_id) == 0) { - body.add_primitive(std::make_shared(internal_input_id, calculated_layout)); - } else { - body.change_input_layout(internal_input_id, calculated_layout); + // Get sliced mem for the iteration idx and copy data from external input to sliced mem + // In the case of dynamic model, concatenated_mem is always non nullptr. + memory::ptr get_sliced_mem(int64_t iteration) const { + OPENVINO_ASSERT(!sliced_mems.empty(), "For input data, sliced_mems should not be empty"); + mem_lock from_lock{ concatenated_mem, stream }; + int64_t batch_offset = 0; + auto sliced_mem = get_or_create_sliced_mem(iteration, sliced_mems.front()->get_layout()); + const int64_t iteration_offset = bytes_iteration_initial_offset + + bytes_iteration_stride * iteration; + // To support multi-batch, just repeat memcpy for each batch + for (int64_t batch = 0; batch < batch_size; ++batch) { + const int64_t src_offset = batch_offset + iteration_offset; + const int64_t dst_offset = batch * bytes_iteration; + mem_lock to_lock{ sliced_mem, stream }; + const auto src = from_lock.begin() + src_offset; + const auto dst = to_lock.begin() + dst_offset; + std::copy(src, src + bytes_iteration, dst); + batch_offset += bytes_batch_stride; } + return sliced_mem; } - // setup internal output - OPENVINO_ASSERT(!output_primitive_maps.empty(), "[GPU] Output primitive map should have at least 1 mapping in primitive ", this->id()); - std::set output_names; - output_names.insert(output_primitive_maps.front().internal_id); - - // add current_iteration_id in body network, condition_id if exist - process_current_iteration(); - process_single_int_output(get_condition_id()); - - // setup outputs for backedges - for (auto& back_edge : back_edges) { - // check whether the back_edge.to has its corresponding io_primitive_map - const auto& input_map = std::find_if(input_primitive_maps.begin(), input_primitive_maps.end(), - [&](const loop::io_primitive_map& pm) { - return pm.internal_id == back_edge.to; - }); - - // backedge which is current_iteration does not have - // input primitive map because its initial value is always - // zero and the value will be set in execute_impl() - if (back_edge.to != get_current_iteration_id() && input_map == input_primitive_maps.end()) { - std::string msg = "[GPU] No primitive mapping for backedge (internal_id: " + back_edge.to + ") for primitive " + this->id(); - OPENVINO_ASSERT(false, msg.c_str()); + memory::ptr get_or_create_sliced_mem(int64_t idx, const layout& mem_layout) const { + bool recalc_data = !sliced_mems.empty(); + while (sliced_mems.size() <= static_cast(idx)) { + memory::ptr sliced_mem = engine.allocate_memory(mem_layout, 0); + sliced_mems.push_back(sliced_mem); } + if (recalc_data) { + calculate_concatenated_mem(); + } + return sliced_mems.at(idx); + } - output_names.insert(back_edge.from); + void setup_sliced_output_memory(uint64_t iteration) const { + if (sliced_data_prim) { + OPENVINO_ASSERT(iteration < sliced_mems.size(), "invalid index"); + const auto& sliced_output_mem = sliced_mems.at(iteration); + sliced_data_prim->set_output_memory(sliced_output_mem); + } } - // if execution_condition_id is specified, we need to add the id in build_option::outputs - if (!get_condition_id().empty()) { - output_names.insert(get_condition_id()); + std::vector& get_sliced_mems() const { return sliced_mems; } + + void reset_data_for_shape_changed() { + bytes_per_element = 0; + batch_size = 0; + bytes_batch_stride = 0; + bytes_iteration = 0; + bytes_iteration_stride = 0; + bytes_iteration_initial_offset = 0; + if (concatenated_mem) concatenated_mem = nullptr; + iteration_elements = 0; + sliced_mems.clear(); } - std::vector output_names_vec(output_names.begin(), output_names.end()); - auto config = get_program().get_config(); - config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); - body_program = program::build_program(get_program().get_engine(), body, config, get_program().get_task_executor(), false, false, true); - } + std::string to_string() const { + std::stringstream ss; + ss << "concatenated_memory_mapping [" << std::endl; + ss << "* axis : " << axis << std::endl; + ss << "* bytes_per_element : " << bytes_per_element << std::endl; + ss << "* batch_size : " << batch_size << std::endl; + if (concatenated_mem != nullptr && concatenated_mem->get_layout().is_static()) { + ss << "* bytes_batch_stride : " << bytes_batch_stride << " = (static_cast(" + << concatenated_mem->get_layout().count() << ") / batch_size:" << batch_size << ") * bytes_per_element:" << bytes_per_element << std::endl; + } else { + ss << "* bytes_batch_stride : " << bytes_batch_stride << std::endl; + } + ss << "* bytes_iteration : " << bytes_iteration << " = (iteration_elements:" + << iteration_elements << " * bytes_per_element:" << bytes_per_element << ")" << std::endl; + ss << "* bytes_iteration_stride : " << bytes_iteration_stride << std::endl; + ss << "* bytes_iteration_initial_offset : " << bytes_iteration_initial_offset << std::endl; + ss << "* concat_data_prim : " << ((concat_data_prim != nullptr)? concat_data_prim->id() : "nullptr") << std::endl; + ss << "* sliced_data_prim : " << ((sliced_data_prim != nullptr)? sliced_data_prim->id() : "nullptr") << std::endl; + if (concatenated_mem) { + ss << "* concatenated_mem : " << concatenated_mem->get_layout().to_short_string() << std::endl; + } else { + ss << "* concatenated_mem : nullptr" << std::endl; + } + ss << "* iteration_elements : " << iteration_elements << std::endl; + ss << "* stride : " << stride << std::endl; + ss << "* initial_offset : " << initial_offset << std::endl; + ss << "* sliced_mems :{ "; + for (auto mem : sliced_mems) { + ss << mem->get_layout().to_short_string() << ","; + } + ss << "}]" << std::endl; + return ss.str(); + } - const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; } - const primitive_id& get_initial_execution_id() const { return get_primitive()->initial_execution_id; } - const primitive_id& get_current_iteration_id() const { return get_primitive()->current_iteration_id; } - const primitive_id& get_condition_id() const { return get_primitive()->condition_id; } - const primitive_id& get_num_iteration_id() const { return get_primitive()->num_iteration_id; } - const topology& get_body_topology() const { return get_primitive()->body; } -}; + const int64_t axis; + std::shared_ptr concat_data_prim; + std::shared_ptr sliced_data_prim; -using loop_node = typed_program_node; +private: + mutable memory::ptr concatenated_mem; + mutable std::vector sliced_mems; + cldnn::stream& stream; + cldnn::engine& engine; + mutable int64_t iteration_elements = 0; + const int64_t stride = 0; + const int64_t initial_offset = 0; -template <> -class typed_primitive_inst : public typed_primitive_inst_base { - using parent = typed_primitive_inst_base; - using parent::parent; + // element size + mutable int64_t bytes_per_element; + // number of higher level of dimension of slicing axis + mutable int64_t batch_size; + // stride of batch in concatenated memory + mutable int64_t bytes_batch_stride; + // byte size of each iteration per batch in a sliced memory + mutable int64_t bytes_iteration; + // byte size of each iteration (bytes_iteration * batch_size) in a sliced memory + mutable int64_t bytes_iteration_stride; + // byte offset of 1st iteration in a batch in a sliced memory + mutable int64_t bytes_iteration_initial_offset; + }; -public: struct backedge_memory_mapping { enum backedge_type { // output memory(from_primitive) of body network needs to be concatenated CONCAT_OUTPUT, - // output memory(from_primitive) of body network does not need to be concateneated + // output memory(from_primitive) of body network does not need to be concatenated // input memory is shared by output memory SINGLE_SHARED, - // output memory(from_primitive) of body network does not need to be concateneated - // input memory is not shared by output memroy + // output memory(from_primitive) of body network does not need to be concatenated + // input memory is not shared by output memory // each iteration input memory and output memory are swapped SINGLE, }; std::shared_ptr from_primitive; std::shared_ptr to_primitive; - std::vector from_mems; + std::shared_ptr concat_mem_mapping; + mutable memory::ptr from_mem; memory::ptr initial_mem; cldnn::stream& stream; backedge_type type; @@ -349,10 +347,11 @@ class typed_primitive_inst : public typed_primitive_inst_base { backedge_memory_mapping( std::shared_ptr _from_primitive, std::shared_ptr _to_primitive, - std::vector _from_mems, memory::ptr _initial_mem, cldnn::stream& _stream, backedge_type _type = CONCAT_OUTPUT): + std::shared_ptr _concat_mem_mapping, memory::ptr _initial_mem, + cldnn::stream& _stream, backedge_type _type = CONCAT_OUTPUT): from_primitive(_from_primitive), to_primitive(std::move(_to_primitive)), - from_mems(_from_mems), + concat_mem_mapping(std::move(_concat_mem_mapping)), initial_mem(std::move(_initial_mem)), stream(_stream), type(_type), @@ -365,7 +364,7 @@ class typed_primitive_inst : public typed_primitive_inst_base { memory::ptr _from_mem, memory::ptr _initial_mem, cldnn::stream& _stream, backedge_type _type = SINGLE_SHARED): from_primitive(_from_primitive), to_primitive(std::move(_to_primitive)), - from_mems{std::move(_from_mem)}, + from_mem{std::move(_from_mem)}, initial_mem(std::move(_initial_mem)), stream(_stream), type(_type), @@ -385,161 +384,67 @@ class typed_primitive_inst : public typed_primitive_inst_base { validate_backedge_memory(); } - void setup_iteration(int64_t iter) const { - if (type == CONCAT_OUTPUT) { - if (iter == 0) { - to_primitive->set_output_memory(initial_mem); - } else if (iter > 0) { - to_primitive->set_output_memory(from_mems.at(iter - 1)); - } else { - throw std::runtime_error("Invalid iteraton count" + std::to_string(iter)); - } - } else if (type == SINGLE_SHARED && iter == 0) { - from_mems.front()->copy_from(stream, *initial_mem); - } else if (type == SINGLE) { - memory::ptr mem1 = to_primitive->output_memory_ptr(); - if (iter == 0) { - mem1->copy_from(stream, *initial_mem); - } else { - memory::ptr mem2 = from_primitive->output_memory_ptr(); - to_primitive->set_output_memory(std::move(mem2)); - from_primitive->set_output_memory(mem1); - } - } - } - private: void validate_backedge_memory() { - for (const auto& from_mem : from_mems) { + if (from_mem) { const size_t from_mem_bytes = from_mem->get_layout().bytes_count(); - if (from_mem_bytes != total_bytes) { - throw std::runtime_error("Invalid backedge memory layout: " - "size not matched with that of initial_mem"); - } + OPENVINO_ASSERT((from_mem_bytes == total_bytes), "Invalid backedge memory layout: size(", + from_mem_bytes, ",", from_mem->get_layout().to_short_string(), + ") not matched with that of initial_mem(", total_bytes, + ",", initial_mem->get_layout().to_short_string(), ")"); } - } - }; - - struct concatenated_memory_mapping { - concatenated_memory_mapping(int64_t axis, - memory::ptr concatenated_mem, - std::vector sliced_mems, - stream& stream, - int64_t iteration_elements = 0, - int64_t stride = 0, - int64_t initial_offset = 0) : - axis(axis), - concatenated_mem(concatenated_mem), - sliced_mems(sliced_mems), - stream(stream), - bytes_per_element(data_type_traits::size_of(concatenated_mem->get_layout().data_type)), - batch_size(get_batch_size(concatenated_mem->get_layout(), axis)), - bytes_batch_stride((static_cast(concatenated_mem->get_layout().count()) / batch_size) * bytes_per_element), - bytes_iteration(iteration_elements * bytes_per_element), - bytes_iteration_stride(stride * bytes_iteration), - bytes_iteration_initial_offset(initial_offset * bytes_iteration) {} - - static int64_t get_batch_size(layout mem_layout, int64_t axis) { - if (axis < 0) { - throw std::runtime_error("axis should be positive integer or zero"); - } - - int64_t batch_size = 1; - for (int64_t i = 0; i < axis; ++i) { - batch_size *= mem_layout.get_tensor().raw[i]; - } - for (int64_t i = axis-1; i >= 2; --i) { - batch_size *= mem_layout.get_tensor().raw[i]; - } - return batch_size; - } - - void restore_concatenated_mem() const { - mem_lock concat_mem_lock{ concatenated_mem, stream }; - int64_t iteration_offset = bytes_iteration_initial_offset; - for (const auto& sliced_mem : sliced_mems) { - for (int64_t batch = 0; batch < batch_size; ++batch) { - const int64_t src_offset = batch * bytes_iteration; - const int64_t dst_offset = batch * bytes_batch_stride + iteration_offset; - mem_lock sliced_mem_lock{ sliced_mem, stream }; - uint8_t* src = sliced_mem_lock.data() + src_offset; - uint8_t* dst = concat_mem_lock.data() + dst_offset; - std::copy(src, src + bytes_iteration, dst); + if (concat_mem_mapping) { + for (const auto& from_mem : concat_mem_mapping->get_sliced_mems()) { + const size_t from_mem_bytes = from_mem->get_layout().bytes_count(); + OPENVINO_ASSERT((from_mem_bytes == total_bytes), "Invalid backedge memory layout: size(", + from_mem_bytes, ",", from_mem->get_layout().to_short_string(), + ") not matched with that of initial_mem(", total_bytes, + ",", initial_mem->get_layout().to_short_string(), ")"); } - iteration_offset += bytes_iteration_stride; } } - - void setup_sliced_output_memory(uint64_t iteration) const { - const auto& sliced_output_mem = sliced_mems.at(iteration); - sliced_data_prim->set_output_memory(sliced_output_mem); - } - - memory::ptr get_sliced_mem(int64_t iteration) const { - mem_lock from_lock{ concatenated_mem, stream }; - int64_t batch_offset = 0; - const int64_t iteration_offset = bytes_iteration_initial_offset + - bytes_iteration_stride * iteration; - for (int64_t batch = 0; batch < batch_size; ++batch) { - const int64_t src_offset = batch_offset + iteration_offset; - const int64_t dst_offset = batch * bytes_iteration; - mem_lock to_lock{ sliced_mems.at(iteration), stream }; - const auto src = from_lock.begin() + src_offset; - const auto dst = to_lock.begin() + dst_offset; - std::copy(src, src + bytes_iteration, dst); - batch_offset += bytes_batch_stride; - } - return sliced_mems.at(iteration); - } - - const int64_t axis; - std::shared_ptr concat_data_prim; - std::shared_ptr sliced_data_prim; - memory::ptr concatenated_mem; - std::vector sliced_mems; - cldnn::stream& stream; - // element size - const int64_t bytes_per_element; - // number of higher level of dimension of slicing axis - const int64_t batch_size; - // stride of batch in concatanated memory - const int64_t bytes_batch_stride; - // byte size of each iteration per batch in a sliced memory - const int64_t bytes_iteration; - // byte size of each iteration (bytes_iteration * batch_size) in a sliced memory - const int64_t bytes_iteration_stride; - // byte offset of 1st iteration in a batch in a sliced memory - const int64_t bytes_iteration_initial_offset; }; - static layout calc_output_layout(const loop_node& node, kernel_impl_params const& impl_param); + template + static std::vector calc_output_layouts(loop_node const& /*node*/, kernel_impl_params const& impl_param); + static layout calc_output_layout(const loop_node& /*node*/, kernel_impl_params const& impl_param); bool preproc_memories_done = false; std::vector backedge_memory_mappings; - std::vector concatenated_input_mem_mappings; - std::vector concatenated_output_mem_mappings; + std::vector concatenated_input_mem_mappings; + std::vector concatenated_output_mem_mappings; static std::string to_string(const loop_node& node); - size_t current_iteratoin_backedge_mapping_idx = 0; public: typed_primitive_inst(network& network, const loop_node& node); network::ptr get_body_network() const { return body_network; } - void preprocess_input_memory(); - void preprocess_output_memory(); + void preprocess_input_memory(const int64_t trip_count); + void preprocess_output_memory(const int64_t trip_count); void preprocess_backedge_memory(); void update_mapped_memory(); + void update_input_mapped_memory(); + void update_output_mapped_memory(); + void update_backedge_mapped_memory(); + void postprocess_output_memory(bool is_dynamic); + concatenated_memory_mapping::ptr create_concat_memory_map(const input_info& id, + const cldnn::loop::io_primitive_map& io_prim_map, + memory::ptr mem_ptr, + const int64_t trip_count); event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0) override; - const backedge_memory_mapping& get_current_iteration_backedge_mapping() const { - OPENVINO_ASSERT(node->is_current_iteration_used(), "[GPU] No backedge mapping for current_iteration for primitive ", node->id()); - return backedge_memory_mappings.at(current_iteratoin_backedge_mapping_idx); - } + void reset_memory(); + void save(BinaryOutputBuffer& ob) const override; void load(BinaryInputBuffer& ib) override; + void validate_backedges(loop_node const & node) const; + + void update_shape() override { primitive_inst::update_shape(); } + void update_output_layout(); private: network::ptr body_network; - memory::ptr get_external_memory(const primitive_id& external_id) const; - std::vector get_sliced_mem(const primitive_id& internal_id) const; + memory::ptr get_external_memory(const primitive_id& external_id, size_t mem_idx = 0) const; + layout get_external_output_layout(const primitive_id& external_id, size_t mem_idx = 0) const; + std::shared_ptr get_sliced_mem(const primitive_id& internal_id) const; std::vector _input_primitive_maps; std::vector _output_primitive_maps; std::vector _back_edges; @@ -547,9 +452,13 @@ class typed_primitive_inst : public typed_primitive_inst_base { primitive_id _initial_execution_id; primitive_id _current_iteration_id; primitive_id _condition_id; - primitive_id _num_iteration_id; - int64_t _max_iteration = 0; + primitive_id _num_iterations_id; }; using loop_inst = typed_primitive_inst; + +static inline std::ostream& operator<< (std::ostream& os, loop_inst::concatenated_memory_mapping& map) { + os << map.to_string(); + return os; +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index fabb1e53329293..69b1e12fa3b4ae 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -32,6 +32,7 @@ #include "region_yolo_inst.h" #include "prior_box_inst.h" #include "scatter_nd_update_inst.h" +#include "gather_inst.h" #include "to_string_utils.h" #include #include @@ -306,7 +307,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, (fmt_prev == format::b_fs_yx_fsv4 && prev_output_layout.feature() % 32 == 0 && prev_output_layout.spatial(0) == 1 && - prev_output_layout.spatial(1) == 1))) + prev_output_layout.spatial(1) == 1)) && is_input_reorder(prev, next)) return true; if (next.is_type() && fmt_prev == format::b_fs_yx_fsv16 && fmt_next == format::b_fs_yx_fsv4 && is_input_idx(0)) @@ -849,6 +850,18 @@ static bool is_node_for_onednn(reduce_node const& node, format preferred_format) return true; } +static bool is_node_for_onednn(convolution_node const& node) { + if (!layout_optimizer::are_data_types_suitable_for_onednn((program_node&)node)) + return false; + + auto input_layout = node.get_input_layout(0); + auto output_layout = node.get_output_layout(0); + if (input_layout.is_dynamic() || output_layout.is_dynamic()) + return false; + + return true; +} + static bool is_node_for_onednn(deconvolution_node const& node) { auto prim = node.get_primitive(); auto input_layout = node.get_input_layout(0); @@ -871,6 +884,9 @@ static bool is_node_for_onednn(deconvolution_node const& node) { static bool is_node_for_onednn(fully_connected_node const& node) { + if (!layout_optimizer::are_data_types_suitable_for_onednn((program_node&)node)) + return false; + auto fc_prim = node.get_primitive(); // onednn impl doesn't support compressed weights for now if (fc_prim->compressed_weights) @@ -891,6 +907,10 @@ static bool is_node_for_onednn(fully_connected_node const& node) { return true; } +static bool is_node_for_onednn(gemm_node const& node) { + return layout_optimizer::are_data_types_suitable_for_onednn((program_node&)node); +} + // This function is needed to avoid performance regressions for the convolutions with byxf layout // Previously some topologies had scale operations which prevented byxf usage // Now instead of scale we have eltwise + fused_ops which might enable byxf convolution in unexpected cases @@ -1242,6 +1262,20 @@ format layout_optimizer::get_expected_format(quantize_node const& node) { return expected; } +bool layout_optimizer::is_node_suitable_for_onednn(program_node& node) { + if (node.is_type()) { + return is_node_for_onednn(node.as()); + } else if (node.is_type()) { + return is_node_for_onednn(node.as()); + } else if (node.is_type()) { + return is_node_for_onednn(node.as()); + } else if (node.is_type()) { + return is_node_for_onednn(node.as()); + } + + return false; +} + bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) { auto in_dt = node.get_input_layout(0).data_type; auto out_dt = node.get_output_layout(false).data_type; @@ -1770,6 +1804,10 @@ format layout_optimizer::get_preferred_format(program_node& node) { node.set_preferred_input_fmt(0, format::bfyx); } } + } else if (node.is_type()) { + // Gather needs the original input/output rank because + // the parameters as indices, batch_dims and axis depend on the rank. + node.set_preferred_input_fmt(0, format::get_default_format(node.as().get_primitive()->input_rank)); } if (allow_new_shape_infer && node.get_preferred_input_fmt() != format::any) { @@ -2089,6 +2127,10 @@ void layout_optimizer::set_implementation_forcing(const ov::intel_gpu::ImplForci } } +const std::map> layout_optimizer::get_implementation_forcing() const { + return _forcing_map; +} + const std::vector> layout_optimizer::optimized_formats = { {format::b_fs_yx_fsv16, true}, {format::b_fs_yx_fsv16, false}, diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp index 4e33f14e507084..a51c2d0d85973e 100644 --- a/src/plugins/intel_gpu/src/graph/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/loop.cpp @@ -3,6 +3,8 @@ // #include "loop_inst.h" +#include "data_inst.h" +#include "mutable_data_inst.h" #include "json_object.h" #include "primitive_type_base.h" #include "intel_gpu/primitives/data.hpp" @@ -15,6 +17,41 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(loop) +std::map loop_node::get_memory_deps() const { + auto memory_deps = get_const_memory_deps(); + for (auto& i : get_shape_infer_dependencies()) { + auto& dep = get_dependency(i); + auto dep_id = dep.id(); + if (memory_deps.count(i) > 0 || i >= get_dependencies().size()) { + continue; + } + + memory::ptr mem_ptr = nullptr; + if (dep.is_type()) { + mem_ptr = dep.as().get_attached_memory_ptr(); + } else if (dep.is_type()) { + mem_ptr = dep.as().get_attached_memory_ptr(); + } + + if (mem_ptr) { + memory_deps.insert({i, mem_ptr}); + } + } + return memory_deps; +} + +static size_t convert_to_raw_axis(size_t axis, size_t ndim) { + // convert between bfyx, bfzyx, bfzyxw and tensor.size.raw + if (axis >= ndim) { + throw std::runtime_error("axis should be less than ndim"); + } + + if (axis < 2) { + return axis; + } + return (ndim - 1) - (axis - 2); +} + static bool check_if_axis_is_set_properly(loop_node const & node) { const auto& input_primitive_maps = node.get_input_primitive_maps(); @@ -30,11 +67,11 @@ static bool check_if_axis_is_set_properly(loop_node const & node) { int32_t iteration_size = -1; for (const auto& pm : input_with_axis_iteration) { auto found = std::find_if(dependencies.begin(), dependencies.end(), - [&pm](const std::pair& dep){ return dep.first->id() == pm.get().external_id; }); + [&pm](const std::pair& dep){ return dep.first->id() == pm.get().external_id.pid; }); assert(found != dependencies.end()); const layout input_layout = (*found).first->get_output_layout(); const auto shape = input_layout.get_tensor().sizes(input_layout.format); - const size_t iteration_axis = node.convert_to_raw_axis(pm.get().axis, static_cast(shape.size())); + const size_t iteration_axis = convert_to_raw_axis(pm.get().axis, static_cast(shape.size())); if (iteration_size < 0) { iteration_size = shape[iteration_axis]; } else { @@ -48,7 +85,7 @@ static bool check_if_axis_is_set_properly(loop_node const & node) { for (const auto& input_ref : input_with_axis_iteration) { const loop::io_primitive_map& input = input_ref.get(); auto dep = std::find_if(dependencies.begin(), dependencies.end(), - [&input](const std::pair& dep) { return input.external_id == dep.first->id(); }); + [&input](const std::pair& dep) { return input.external_id.pid == dep.first->id(); }); // if corresponding external id is not found if (dep == dependencies.end()) { @@ -58,75 +95,104 @@ static bool check_if_axis_is_set_properly(loop_node const & node) { return true; } -static void validate_backedges(loop_node const & node) { - const auto& back_edges = node.get_back_edges(); - const auto& input_primitive_maps = node.get_input_primitive_maps(); +layout loop_inst::calc_output_layout(loop_node const& /*node*/, kernel_impl_params const& impl_param) { + auto prim = impl_param.typed_desc(); - // check input with iteration axis has backedge - for (const auto& back_edge : back_edges) { - for (const auto& mapping : input_primitive_maps) { - if (mapping.internal_id == back_edge.to && mapping.axis >= 0) { - CLDNN_ERROR_MESSAGE(node.id(), - "input with iteration axis should not have backedges"); - } - } - } -} + // finds internal output + const auto& output_primitive_maps = prim->output_primitive_maps; + const auto& output_mapping = output_primitive_maps.front(); -layout loop_inst::calc_output_layout(loop_node const & node, kernel_impl_params const& impl_param) { - // body program should be built here to calculate body input layout - // from outputs of loop's dependency and calculate loop output layout - // from the outputs of body program - if (!node.get_body_program()) { - const_cast(node).build_body_program(); - } + const auto& body_program = impl_param.inner_progs.front(); + const auto& body_outputs = body_program->get_outputs(); - // type checks - const primitive_id& num_iteration_id = node.get_num_iteration_id(); - if (!node.get_program().get_node(num_iteration_id).is_type()) { - CLDNN_ERROR_MESSAGE(node.id(), "num_iteration is not mutable_data"); + const primitive_id& output_internal_id = output_mapping.internal_id.pid; + auto target = std::find_if(body_outputs.begin(), body_outputs.end(), [&](const cldnn::program_node * output) { + return output->id() == output_internal_id; + }); + OPENVINO_ASSERT(target != body_outputs.end(), impl_param.desc->id, "output not found"); + + // set body output layout + layout loop_output_layout = (*target)->get_output_layout(); + const int64_t axis_to_iterate_through = output_mapping.axis; + if (axis_to_iterate_through != -1) { + const size_t ndim = loop_output_layout.get_rank(); + auto shape = loop_output_layout.get_dims(); + shape[axis_to_iterate_through] = static_cast(prim->max_num_iterations); + loop_output_layout.set_tensor(tensor(format::get_default_format(ndim), shape)); } - if (!check_if_axis_is_set_properly(node)) { - CLDNN_ERROR_MESSAGE(node.id(), "axis is not set properly"); - } + return loop_output_layout; +} +template +static std::vector get_output_layouts(kernel_impl_params const& impl_param, std::vector body_outputs, const int64_t num_iterations = -1) { + auto prim = impl_param.typed_desc(); + std::vector output_layouts; + + const auto& output_primitive_maps = prim->output_primitive_maps; + for (auto& output_mapping : output_primitive_maps) { + const primitive_id& output_internal_id = output_mapping.internal_id.pid; + auto target = std::find_if(body_outputs.begin(), body_outputs.end(), [&](const T output) { + return output->id() == output_internal_id; + }); + OPENVINO_ASSERT(target != body_outputs.end(), impl_param.desc->id, "output not found"); - // finds internal output - const auto& output_primitive_maps = node.get_output_primitive_maps(); - const auto& output_mapping = output_primitive_maps.front(); - const auto& body_outputs = node.get_body_program()->get_outputs(); - const primitive_id& output_internal_id = output_mapping.internal_id; - auto target = std::find_if(body_outputs.begin(), body_outputs.end(), [&](const cldnn::program_node * output) { - return output->id() == output_internal_id; - }); - layout loop_output_layout; - if (target == body_outputs.end()) { - CLDNN_ERROR_MESSAGE(impl_param.desc->id, "output not found"); - } else { // set body output layout - loop_output_layout = (*target)->get_output_layout(); - const int64_t axis_to_iterate_throgh = output_mapping.axis; - if (axis_to_iterate_throgh != -1) { - const size_t ndim = loop_output_layout.get_rank(); - auto shape = loop_output_layout.get_dims(); - shape[axis_to_iterate_throgh] = static_cast(node.get_max_iteration()); - loop_output_layout.set_tensor(tensor(format::get_default_format(ndim), shape)); + layout loop_output_layout = (*target)->get_output_layout(); + const int64_t axis_to_iterate_through = output_mapping.axis; + if (axis_to_iterate_through != -1) { + auto shape = loop_output_layout.get_partial_shape(); + shape[axis_to_iterate_through] = static_cast(num_iterations); + loop_output_layout.set_partial_shape(shape); } + output_layouts.push_back(loop_output_layout); } - return loop_output_layout; + return output_layouts; +} + +template +std::vector loop_inst::calc_output_layouts(loop_node const& /*node*/, kernel_impl_params const& impl_param) { + std::vector output_layouts; + auto prim = impl_param.typed_desc(); + if (impl_param.inner_nets.empty()) { + OPENVINO_ASSERT(impl_param.inner_progs.size() == 1, "Loop(", prim->id, ") should have only one inner network"); + const auto& body_outputs = impl_param.inner_progs.front()->get_outputs(); + output_layouts = get_output_layouts(impl_param, body_outputs); + } else { + auto& memory_deps = impl_param.memory_deps; + const size_t current_iteration_idx = 0; + OPENVINO_ASSERT(memory_deps.count(current_iteration_idx) > 0, "The count of memory deps(current_iteration) should not be zero"); + cldnn::mem_lock current_iterations_lock(memory_deps.at(current_iteration_idx), impl_param.get_stream()); + int64_t current_iteration = static_cast(*current_iterations_lock.data()); + GPU_DEBUG_LOG << "* current_iteration(" << memory_deps.at(current_iteration_idx) << ") : " << current_iteration << std::endl; + + OPENVINO_ASSERT(impl_param.inner_nets.size() == 1, "Loop(", prim->id, ") should have only one inner program"); + const auto& body_outputs = impl_param.inner_nets.front()->get_outputs(); + output_layouts = get_output_layouts>(impl_param, body_outputs, current_iteration); + } + return output_layouts; } +template std::vector loop_inst::calc_output_layouts(loop_node const& node, const kernel_impl_params& impl_param); + + std::string loop_inst::to_string(const loop_node & node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); + std::vector body_inputs; + { + for (auto& input : desc->body_program->get_inputs()) { + body_inputs.push_back(input->id()); + } + } + json_composite loop_info; - loop_info.add("body input id", desc->body.get_primitives_ids()); + loop_info.add("body input id", body_inputs); loop_info.add("trip_count_id", desc->trip_count_id); - loop_info.add("initial_execution_id", desc->initial_execution_id); - loop_info.add("current_iteration_id", desc->current_iteration_id); - loop_info.add("condition_id", desc->condition_id); + loop_info.add("first_execution_condition_id", desc->first_execution_condition_id); + loop_info.add("body_current_iteration_id", desc->body_current_iteration_id); + loop_info.add("body_execution_condition_id", desc->body_execution_condition_id); std::stringstream primitive_description; node_info->add("loop info", loop_info); @@ -142,23 +208,23 @@ static std::vector find_io_primitive_maps( std::vector ret; if (is_external) { for (const auto& it : input_primitive_maps) { - if (it.external_id == prim_id) { + if (it.external_id.pid == prim_id) { ret.push_back(&it); } } for (const auto& it : output_primitive_maps) { - if (it.external_id == prim_id) { + if (it.external_id.pid == prim_id) { ret.push_back(&it); } } } else { for (const auto& it : input_primitive_maps) { - if (it.internal_id == prim_id) { + if (it.internal_id.pid == prim_id) { ret.push_back(&it); } } for (const auto& it : output_primitive_maps) { - if (it.internal_id == prim_id) { + if (it.internal_id.pid == prim_id) { ret.push_back(&it); } } @@ -175,24 +241,19 @@ static void validate_mappings(loop_node const & node) { for (const auto& id : outer_inputs) { if (id == node.get_trip_count_id() || id == node.get_initial_execution_id() || - id == node.get_num_iteration_id()) { + id == node.get_num_iterations_id()) { continue; } const auto results = find_io_primitive_maps(node.get_input_primitive_maps(), node.get_output_primitive_maps(), id, true); - if (results.size() == 0) { - std::string msg = "outer input '" + id + "' does not have primitive map"; - CLDNN_ERROR_MESSAGE(node.id(), msg.c_str()); - } + OPENVINO_ASSERT(results.size() > 0, node.id(), " : outer input '", id, "' does not have primitive map"); } // check all io_primitive_maps have their corresponding external id for (const auto& pm : input_primitive_maps) { - auto found = std::find(outer_inputs.begin(), outer_inputs.end(), pm.external_id); - if (found == outer_inputs.end()) { - std::string msg = "external id '" + pm.external_id + "' in primitive map cannot be found loop inputs"; - CLDNN_ERROR_MESSAGE(node.id(), msg.c_str()); - } + auto found = std::find(outer_inputs.begin(), outer_inputs.end(), pm.external_id.pid); + OPENVINO_ASSERT(found != outer_inputs.end(), node.id(), + " : external id '", pm.external_id.pid, "' in primitive map cannot be found loop inputs"); } const auto& nodes = node.get_body_program()->get_processing_order(); @@ -200,50 +261,25 @@ static void validate_mappings(loop_node const & node) { // check all io_primitive_maps have their corresponding interal id for (const auto& pm : input_primitive_maps) { auto found = std::find_if(nodes.begin(), nodes.end(), [&pm](const program_node* body_input) { - return body_input->id() == pm.internal_id; + return body_input->id() == pm.internal_id.pid; }); - if (found == nodes.end()) { - std::string msg = "internal id '" + pm.internal_id + "' in primitive map cannot be found loop body"; - CLDNN_ERROR_MESSAGE(node.id(), msg.c_str()); - } + OPENVINO_ASSERT(found != nodes.end(), node.id(), + " : internal id '", pm.internal_id.pid, "' in primitive map cannot be found loop body"); } for (const auto& pm : output_primitive_maps) { auto found = std::find_if(nodes.begin(), nodes.end(), [&pm](const program_node* body_output) { - return body_output->id() == pm.internal_id; + return body_output->id() == pm.internal_id.pid; }); - if (found == nodes.end()) { - std::string msg = "internal id '" + pm.internal_id + "' in primitive map cannot be found body body"; - CLDNN_ERROR_MESSAGE(node.id(), msg.c_str()); - } + OPENVINO_ASSERT(found != nodes.end(), node.id(), + " : internal id '", pm.internal_id.pid, "' in primitive map cannot be found body body"); } } -void loop_inst::update_mapped_memory() { - if (!preproc_memories_done) { - return; - } - // update output memory - for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { - const auto& output_mapping = _output_primitive_maps.at(i); - const primitive_id& external_id = output_mapping.external_id; - const primitive_id& internal_id = output_mapping.internal_id; - memory::ptr to_mem = get_external_memory(external_id); - if (output_mapping.axis < 0) { - body_network->get_primitive(internal_id)->set_output_memory(to_mem); - } else { - for (auto& mem_mapping : concatenated_output_mem_mappings) { - if (mem_mapping.sliced_data_prim->id() == internal_id) { - mem_mapping.concatenated_mem = to_mem; - break; - } - } - } - } - // update input memory +void loop_inst::update_input_mapped_memory() { for (size_t memory_num = 0; memory_num < inputs_memory_count(); memory_num++) { const primitive_id& input_external_id = dependencies().at(memory_num).first->id(); auto input_map_ptrs = find_io_primitive_maps(_input_primitive_maps, - _output_primitive_maps, input_external_id, true); + _output_primitive_maps, input_external_id, true); if (input_map_ptrs.empty()) { if (input_external_id == _trip_count_id || input_external_id == _initial_execution_id) { @@ -257,36 +293,69 @@ void loop_inst::update_mapped_memory() { bool is_concatenated_input = (input_map->axis >= 0); if (is_concatenated_input) { for (auto& mem_mapping : concatenated_input_mem_mappings) { - if (mem_mapping.sliced_data_prim->id() == input_map->internal_id) { - mem_mapping.concatenated_mem = memory; + if (mem_mapping->sliced_data_prim->id() == input_map->internal_id.pid) { + mem_mapping->update_concatenated_mem(memory); break; } } } else { - body_network->set_input_data(input_map->internal_id, memory); + body_network->set_input_data(input_map->internal_id.pid, memory); } } } - //update backedges memory +} + +void loop_inst::update_output_mapped_memory() { + if (is_dynamic()) { + if (!outputs_allocated()) { + _outputs = allocate_outputs(_impl_params.get(), true, true); + } + } + + for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { + const auto& output_mapping = _output_primitive_maps.at(i); + const primitive_id& external_id = output_mapping.external_id.pid; + const size_t external_mem_idx = output_mapping.external_id.idx; + const primitive_id& internal_id = output_mapping.internal_id.pid; + const size_t internal_mem_idx = output_mapping.internal_id.idx; + + memory::ptr to_mem = get_external_memory(external_id, external_mem_idx); + if (to_mem) { + if (output_mapping.axis < 0) { + body_network->get_primitive(internal_id)->set_output_memory(to_mem, true, internal_mem_idx); + } else { + for (auto& mem_mapping : concatenated_output_mem_mappings) { + if (mem_mapping->sliced_data_prim->id() == internal_id) { + mem_mapping->update_concatenated_mem(to_mem); + break; + } + } + } + } + } +} + +void loop_inst::update_backedge_mapped_memory() { // checking if memory is a destination of a backedge for (const auto& back_edge : _back_edges) { //find corresponding input of the backedge const auto input_map_ptrs = find_io_primitive_maps(_input_primitive_maps, - _output_primitive_maps, back_edge.to, false); + _output_primitive_maps, back_edge.to, false); assert(input_map_ptrs.size() == 1); const auto& input_map = input_map_ptrs.front(); - auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from); + auto backedged_sliced_output = get_sliced_mem(back_edge.from); const auto backedge_to_prim = body_network->get_primitive(back_edge.to); const auto backedge_from_prim = body_network->get_primitive(back_edge.from); - memory::ptr initial_mem = get_external_memory(input_map->external_id); + + memory::ptr initial_mem = get_external_memory(input_map->external_id.pid, input_map->external_id.idx); for (auto& backedge_mapping : backedge_memory_mappings) { if (backedge_mapping.from_primitive->id() == backedge_from_prim->id() && backedge_mapping.to_primitive->id() == backedge_to_prim->id()) { - if (backedged_sliced_output_mems.empty()) { + if (backedged_sliced_output == nullptr) { // backedge output which does not need concatenation const auto output_mapping = find_io_primitive_maps(_input_primitive_maps, - _output_primitive_maps, back_edge.from, false); + _output_primitive_maps, back_edge.from, false); memory::ptr backedge_mem; if (output_mapping.empty()) { // from and to primitives in backedge are connected directly @@ -300,14 +369,15 @@ void loop_inst::update_mapped_memory() { backedge_mem = body_network->get_engine().allocate_memory(output_layout, 0); } } else { - backedge_mem = get_external_memory(output_mapping.front()->external_id); + auto external_id = output_mapping.front()->external_id; + backedge_mem = get_external_memory(external_id.pid, external_id.idx); } body_network->set_input_data(back_edge.to, backedge_mem); body_network->set_output_memory(back_edge.from, backedge_mem); - backedge_mapping.from_mems = { backedge_mem }; + backedge_mapping.from_mem = backedge_mem; backedge_mapping.initial_mem = initial_mem; } else { - backedge_mapping.from_mems = backedged_sliced_output_mems; + backedge_mapping.concat_mem_mapping = backedged_sliced_output; backedge_mapping.initial_mem = initial_mem; } break; @@ -316,92 +386,138 @@ void loop_inst::update_mapped_memory() { } } + +void loop_inst::update_mapped_memory() { + if (!preproc_memories_done) { + return; + } + + update_output_mapped_memory(); + update_input_mapped_memory(); + update_backedge_mapped_memory(); +} + event::ptr loop_inst::set_output_memory(memory::ptr mem, bool check, size_t idx) { auto ev = primitive_inst::set_output_memory(mem, check, idx); update_mapped_memory(); return ev; } -void loop_inst::preprocess_output_memory() { - auto& engine = _network.get_engine(); - concatenated_output_mem_mappings.reserve(_output_primitive_maps.size()); - for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { - const auto& output_mapping = _output_primitive_maps.at(i); - const primitive_id& external_id = output_mapping.external_id; - const primitive_id& internal_id = output_mapping.internal_id; - if (output_mapping.axis < 0) { - memory::ptr memory = get_external_memory(external_id); - body_network->get_primitive(internal_id)->set_output_memory(memory); +loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(const input_info& internal_id, + const cldnn::loop::io_primitive_map& io_prim_map, + memory::ptr mem_ptr, + const int64_t trip_count) { + auto& engine = body_network->get_engine(); + auto& stream = body_network->get_stream(); + auto prim = body_network->get_primitive(internal_id.pid); + const int64_t start = io_prim_map.start < 0? trip_count - 1: io_prim_map.start; + + std::vector sliced_mems; + int64_t num_elements_iteration = 0; + + // if memory is nullptr, that means memory is not allocated yet because current network is dynamic shape model. + // In dynamic model, we can't calculate num_element_iteration, start, and sliced_layout. + // will recalculate that parameters in backedge preprocessing map after first execution. + if (mem_ptr != nullptr) { + layout sliced_layout = prim->output_memory(internal_id.idx).get_layout(); + + // When trip_count is -1, allocate first sliced_mem and allocate sliced memory if additional sliced mem is required + if (trip_count < 0) { + memory::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); + sliced_mems.push_back(sliced_mem); } else { - memory::ptr to_mem = get_external_memory(external_id); - auto output_prim = body_network->get_primitive(internal_id); - layout sliced_layout = output_prim->output_memory().get_layout(); - - const int64_t max_iteration = _max_iteration; - std::vector sliced_mems; - sliced_mems.reserve(max_iteration); - for (int32_t j = 0; j < max_iteration; ++j) { + sliced_mems.reserve(trip_count); + for (int j=0; j < trip_count; ++j) { memory::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); sliced_mems.push_back(sliced_mem); } + } - const int64_t num_elements_batch = concatenated_memory_mapping::get_batch_size( - sliced_layout, output_mapping.axis); - const int64_t num_elements_iteration = sliced_layout.count() / num_elements_batch; - const int64_t start = output_mapping.start < 0? _max_iteration - 1: output_mapping.start; - concatenated_memory_mapping memory_mapping_info( - output_mapping.axis, std::move(to_mem), sliced_mems, _network.get_stream(), - num_elements_iteration, output_mapping.stride, start); - memory_mapping_info.sliced_data_prim = body_network->get_primitive(internal_id); - memory_mapping_info.concat_data_prim = get_network().get_primitive(external_id); - concatenated_output_mem_mappings.push_back(memory_mapping_info); + const int64_t num_elements_batch = concatenated_memory_mapping::get_batch_size( + sliced_layout, io_prim_map.axis); + num_elements_iteration = sliced_layout.count() / num_elements_batch; + } + + auto concat_memory_mapping = std::make_shared( + io_prim_map.axis, mem_ptr, sliced_mems, stream, + engine, num_elements_iteration, io_prim_map.stride, start); + concat_memory_mapping->sliced_data_prim = body_network->get_primitive(internal_id.pid); + return concat_memory_mapping; +} + +void loop_inst::preprocess_output_memory(const int64_t trip_count) { + if (concatenated_output_mem_mappings.empty()) + concatenated_output_mem_mappings.reserve(_output_primitive_maps.size()); + for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { + const auto& output_mapping = _output_primitive_maps.at(i); + const auto& external_id = output_mapping.external_id; + const auto& internal_id = output_mapping.internal_id; + GPU_DEBUG_LOG << i << ") output mapping - external " << external_id.to_string() << std::endl; + GPU_DEBUG_LOG << i << ") output mapping - internal " << internal_id.to_string() << std::endl; + + memory::ptr memory = get_external_memory(external_id.pid, external_id.idx); + if (output_mapping.axis < 0) { + // In dynamic model, Don't get output memory of loop node because body network's output layouts are not calculated + if (memory != nullptr) { + body_network->get_primitive(internal_id.pid)->set_output_memory(memory, true, internal_id.idx); + } + } else { + auto iter = std::find_if(concatenated_output_mem_mappings.begin(), concatenated_output_mem_mappings.end(), + [&](loop_inst::concatenated_memory_mapping::ptr concat_mem_map) -> bool { + return concat_mem_map->sliced_data_prim->id() == internal_id.pid; + }); + if (iter == concatenated_output_mem_mappings.end()) { + auto memory_mapping_info = create_concat_memory_map(internal_id, output_mapping, memory, trip_count); + memory_mapping_info->concat_data_prim = get_network().get_primitive(external_id.pid); + concatenated_output_mem_mappings.push_back(memory_mapping_info); + GPU_DEBUG_LOG << i << ") generate concat output memory mapping: " << memory_mapping_info->to_string() << std::endl; + } + GPU_DEBUG_IF(iter != concatenated_output_mem_mappings.end()) { + GPU_DEBUG_LOG << i << ") memory_mapping_info is already existed : " << (*iter)->to_string() << std::endl; + } } } } -void loop_inst::preprocess_input_memory() { - auto& engine = _network.get_engine(); - auto& iteration_mem = concatenated_input_mem_mappings; +void loop_inst::preprocess_input_memory(const int64_t trip_count) { for (size_t memory_num = 0; memory_num < inputs_memory_count(); memory_num++) { const primitive_id& input_external_id = dependencies().at(memory_num).first->id(); auto input_map_ptrs = find_io_primitive_maps(_input_primitive_maps, - _output_primitive_maps, input_external_id, true); + _output_primitive_maps, input_external_id, true); if (input_map_ptrs.size() == 0) { - if (input_external_id == _trip_count_id || - input_external_id == _initial_execution_id) { - continue; - } - CLDNN_ERROR_MESSAGE(id(), "loop primitive_map is incomplete"); + OPENVINO_ASSERT((input_external_id == _trip_count_id + || input_external_id == _num_iterations_id + || input_external_id == _initial_execution_id), + id(), "loop primitive_map is incomplete " + "input_external_id(", input_external_id, ") != _trip_count_id(", _trip_count_id, ")", + "input_external_id(", input_external_id, ") != _num_iterations_id(", _num_iterations_id, ")", + " && input_external_id(", input_external_id, ") != _initial_execution_id(", _initial_execution_id, ")"); + continue; } auto memory = input_memory_ptr(memory_num); for (size_t i = 0; i < input_map_ptrs.size(); ++i) { const auto input_map = input_map_ptrs.at(i); - bool is_concatenated_input = (input_map->axis >= 0); - if (is_concatenated_input) { - layout sliced_layout - = body_network->get_primitive(input_map->internal_id)->output_memory().get_layout(); - const int64_t max_iteration = _max_iteration; - std::vector sliced_mems; - sliced_mems.reserve(max_iteration); - for (int j=0; j < max_iteration; ++j) { - memory::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); - sliced_mems.push_back(sliced_mem); - } - const int64_t num_elements_batch = concatenated_memory_mapping::get_batch_size( - sliced_layout, input_map->axis); - const int64_t num_elements_iteration = sliced_layout.count() / num_elements_batch; - const int64_t start = input_map->start < 0? _max_iteration - 1: input_map->start; - concatenated_memory_mapping concatenated_input_mem_mapping_info( - input_map->axis, memory, sliced_mems, _network.get_stream(), - num_elements_iteration, input_map->stride, start); - concatenated_input_mem_mapping_info.sliced_data_prim = body_network->get_primitive(input_map->internal_id); - iteration_mem.push_back(concatenated_input_mem_mapping_info); + const auto& external_id = input_map->external_id; + const auto& internal_id = input_map->internal_id; + GPU_DEBUG_LOG << i << ") input mapping - external " << external_id.to_string() << std::endl; + GPU_DEBUG_LOG << i << ") input mapping - internal " << internal_id.to_string() << std::endl; + + if (input_map->axis >= 0) { + OPENVINO_ASSERT(trip_count > 0, "In preprocessing concat input mapping, trip_count should be positive"); + OPENVINO_ASSERT(memory != nullptr, "In preprocessing concat input mapping, concat memory should be allocated"); + auto memory_mapping_info = create_concat_memory_map(internal_id, *input_map, memory, trip_count); + concatenated_input_mem_mappings.push_back(memory_mapping_info); + GPU_DEBUG_LOG << i << ") generate concat input memory mapping: " << memory_mapping_info->to_string() << std::endl; } else { - if (memory->get_layout().data_type != body_network->get_primitive(input_map->internal_id)->output_memory().get_layout().data_type) { - CLDNN_ERROR_MESSAGE(id(), "incompatible datatypes"); + auto input_inst = body_network->get_primitive(internal_id.pid); + if (memory->get_layout() != input_inst->get_output_layout()) { + input_inst->set_output_layout(memory->get_layout()); + GPU_DEBUG_LOG << input_inst->id() << " is changed memory because layout is changed from " + << input_inst->get_output_layout().to_short_string() + << " to " << memory->get_layout().to_short_string() << std::endl; } - body_network->set_input_data(input_map->internal_id, memory); + body_network->set_input_data(internal_id.pid, memory); } } } @@ -409,88 +525,141 @@ void loop_inst::preprocess_input_memory() { void loop_inst::preprocess_backedge_memory() { // checking if memory is a destination of a backedge - for (const auto& back_edge : _back_edges) { + for (size_t idx = 0; idx < _back_edges.size(); idx++) { + const auto& back_edge = _back_edges[idx]; //find corresponding input of the backedge const auto input_map_ptrs = find_io_primitive_maps(_input_primitive_maps, - _output_primitive_maps, back_edge.to, false); + _output_primitive_maps, back_edge.to, false); const auto backedge_to_prim = body_network->get_primitive(back_edge.to); const auto backedge_from_prim = body_network->get_primitive(back_edge.from); memory::ptr initial_mem; - if (back_edge.to == _current_iteration_id) { - const layout current_iteration_layout = backedge_to_prim->output_memory().get_layout(); - initial_mem = get_network().get_engine().allocate_memory(current_iteration_layout); - auto& stream = get_network().get_stream(); - loop_node::write_scalar_value(initial_mem, stream, 0); - current_iteratoin_backedge_mapping_idx = backedge_memory_mappings.size(); + OPENVINO_ASSERT(!input_map_ptrs.empty(), id(), " has no input_mapping for backedged input"); + auto& external_id = input_map_ptrs.front()->external_id; + initial_mem = get_external_memory(external_id.pid, external_id.idx); + + GPU_DEBUG_LOG << idx << ") back_edge mapping - back_edge.from " << back_edge.from << std::endl; + GPU_DEBUG_LOG << idx << ") back_edge mapping - back_edge.to " << back_edge.to << std::endl; + + auto backedged_sliced_output = get_sliced_mem(back_edge.from); + const auto output_mapping = find_io_primitive_maps(_input_primitive_maps, + _output_primitive_maps, back_edge.from, false); + if (backedged_sliced_output != nullptr) { + // CONCAT_OUTPUT mode, backedge output which needs concatenation + backedge_memory_mappings.emplace_back( + backedge_from_prim, backedge_to_prim, backedged_sliced_output, initial_mem, body_network->get_stream()); + GPU_DEBUG_LOG << idx << ") add back_edge mapping with CONCAT_OUTPUT type, backedged_sliced_output(" + << backedged_sliced_output << "), initial_mem(" << initial_mem << ")" << std::endl; + } else if (output_mapping.empty() && backedge_to_prim == backedge_from_prim->dependencies().front().first) { + // SINGLE mode, from and to primitives in backedge are connected directly + backedge_memory_mappings.emplace_back( + backedge_from_prim, backedge_to_prim, initial_mem, body_network->get_stream()); + GPU_DEBUG_LOG << idx << ") add back_edge mapping with SINGLE type, initial_mem(" << initial_mem << ")" << std::endl; } else { - if (input_map_ptrs.empty()) { - CLDNN_ERROR_MESSAGE(id(), "no input_mapping for backedged input"); - } - initial_mem = get_external_memory(input_map_ptrs.front()->external_id); - } - - auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from); - if (backedged_sliced_output_mems.empty()) { - // backedge output which does not need concatenation - const auto output_mapping = find_io_primitive_maps(_input_primitive_maps, - _output_primitive_maps, back_edge.from, false); + // SINGLE_SHARED mode memory::ptr backedge_mem; - if (output_mapping.empty()) { - // from and to primitives in backedge are connected directly - if (backedge_to_prim == backedge_from_prim->dependencies().front().first) { - backedge_memory_mappings.emplace_back( - backedge_from_prim, backedge_to_prim, initial_mem, body_network->get_stream()); - continue; + auto output_prim = body_network->get_primitive(back_edge.from); + + if (is_dynamic()) { + if (output_prim->outputs_allocated()) { + auto internal_output_prim_mem = output_prim->output_memory_ptr(); + if (internal_output_prim_mem->get_layout() == initial_mem->get_layout()) { + backedge_mem = internal_output_prim_mem; + body_network->set_input_data(back_edge.to, backedge_mem); + GPU_DEBUG_LOG << idx << ") Get backedge_mem(" << backedge_mem + << ") from back_edge.from(" << back_edge.from << ")" << std::endl; + } else { + // When input layout is changed or backedge_mem is null + // because output layout of body network is not calculated yet, + // Set backedge_mem to nullptr and update it after first execution. + body_network->set_input_data(back_edge.to, initial_mem); + GPU_DEBUG_LOG << idx << ") Just set input data using initial_mem because back_edge.from(" + << back_edge.from << ") layout is changed or backedge_mem is nullptr" << std::endl; + } } else { - auto output_prim = body_network->get_primitive(back_edge.from); - layout output_layout = output_prim->output_memory().get_layout(); - backedge_mem = body_network->get_engine().allocate_memory(output_layout, 0); + body_network->set_input_data(back_edge.to, initial_mem); + GPU_DEBUG_LOG << idx << ") Just set input data using initial_mem because back_edge.from(" + << back_edge.from << ") has dynamic layout now" << std::endl; } } else { - backedge_mem = get_external_memory(output_mapping.front()->external_id); + if (output_mapping.empty()) { + backedge_mem = output_prim->output_memory_ptr(); + body_network->set_input_data(back_edge.to, backedge_mem); + GPU_DEBUG_LOG << idx << ") Get backedge_mem(" << backedge_mem + << ") from back_edge.from(" << back_edge.from << ")" << std::endl; + } else { + // Set input and output memory for body_network using external output memory of loop op + auto& out_mapping_ext_id = output_mapping.front()->external_id; + backedge_mem = get_external_memory(out_mapping_ext_id.pid, out_mapping_ext_id.idx); + GPU_DEBUG_LOG << idx << ") Get backedge_mem(" << backedge_mem << ") from output_mapping_external_id.pid(" + << out_mapping_ext_id.pid << ")" << std::endl; + + body_network->set_input_data(back_edge.to, backedge_mem); + body_network->set_output_memory(back_edge.from, backedge_mem); + } } - body_network->set_input_data(back_edge.to, backedge_mem); - body_network->set_output_memory(back_edge.from, backedge_mem); + backedge_memory_mappings.emplace_back( backedge_from_prim, backedge_to_prim, backedge_mem, initial_mem, body_network->get_stream()); - } else { - // backedge output which needs concatenation - backedge_memory_mappings.emplace_back( - backedge_from_prim, backedge_to_prim, backedged_sliced_output_mems, initial_mem, body_network->get_stream()); + GPU_DEBUG_LOG << idx << ") add back_edge mapping with SINGLE_SHARED type, backedge_mem(" + << backedge_mem << "), initial_mem(" << initial_mem << ")" << std::endl; } } } -std::vector loop_inst::get_sliced_mem(const primitive_id& internal_id) const { +std::shared_ptr loop_inst::get_sliced_mem(const primitive_id& internal_id) const { for (const auto& mem_mapping : concatenated_input_mem_mappings) { - if (mem_mapping.sliced_data_prim->id() == internal_id) { - return mem_mapping.sliced_mems; + if (mem_mapping->sliced_data_prim->id() == internal_id) { + return mem_mapping; } } for (const auto& mem_mapping : concatenated_output_mem_mappings) { - if (mem_mapping.sliced_data_prim->id() == internal_id) { - return mem_mapping.sliced_mems; + if (mem_mapping->sliced_data_prim->id() == internal_id) { + return mem_mapping; + } + } + return nullptr; // not found +} + +void loop_inst::validate_backedges(loop_node const & node) const { + const auto& back_edges = node.get_back_edges(); + const auto& input_primitive_maps = node.get_input_primitive_maps(); + + // check input with iteration axis has backedge + for (const auto& back_edge : back_edges) { + for (const auto& mapping : input_primitive_maps) { + OPENVINO_ASSERT((mapping.internal_id.pid != back_edge.to || mapping.axis < 0), + node.id(), ": input with iteration axis should not have backedges"); } } - return {}; // not found } -memory::ptr loop_inst::get_external_memory(const primitive_id& external_id) const { +memory::ptr loop_inst::get_external_memory(const primitive_id& external_id, size_t mem_idx) const { const auto outputPrim = _network.get_primitive(external_id); - return outputPrim->output_memory_ptr(); + if (outputPrim->outputs_allocated()) { + return outputPrim->output_memory_ptr(mem_idx); + } + return nullptr; +} + +layout loop_inst::get_external_output_layout(const primitive_id& external_id, size_t mem_idx) const { + const auto outputPrim = _network.get_primitive(external_id); + return outputPrim->get_output_layout(mem_idx); } loop_inst::typed_primitive_inst(network & network, loop_node const & node) : parent(network, node), - preproc_memories_done(false), - body_network(network::allocate_network(network.get_stream_ptr(), - node.get_body_program(), - false, - network.is_primary_stream())) { - if (!check_if_axis_is_set_properly(node)) - CLDNN_ERROR_MESSAGE(node.id(), "axis is not set properly"); - + preproc_memories_done(false), + body_network(network::allocate_network(network.get_stream_ptr(), + node.get_body_program(), + false, + network.is_primary_stream())) { + const primitive_id& num_iterations_id = node.get_num_iterations_id(); + OPENVINO_ASSERT(node.get_program().get_node(num_iterations_id).is_type(), + node.id(), ": num_iterations is not mutable_data"); + OPENVINO_ASSERT(check_if_axis_is_set_properly(node), node.id(), ": axis is not set properly"); + + set_inner_networks({body_network}); validate_backedges(node); validate_mappings(node); @@ -500,9 +669,8 @@ loop_inst::typed_primitive_inst(network & network, loop_node const & node) _trip_count_id = node.get_trip_count_id(); _initial_execution_id = node.get_initial_execution_id(); _current_iteration_id = node.get_current_iteration_id(); - _condition_id = node.get_condition_id(); - _num_iteration_id = node.get_num_iteration_id(); - _max_iteration = node.get_max_iteration(); + _condition_id = node.get_execution_condition_id(); + _num_iterations_id = node.get_num_iterations_id(); } void loop_inst::save(BinaryOutputBuffer& ob) const { @@ -514,8 +682,7 @@ void loop_inst::save(BinaryOutputBuffer& ob) const { ob << _initial_execution_id; ob << _current_iteration_id; ob << _condition_id; - ob << _num_iteration_id; - ob << _max_iteration; + ob << _num_iterations_id; body_network->save(ob); } @@ -529,9 +696,97 @@ void loop_inst::load(BinaryInputBuffer& ib) { ib >> _initial_execution_id; ib >> _current_iteration_id; ib >> _condition_id; - ib >> _num_iteration_id; - ib >> _max_iteration; + ib >> _num_iterations_id; body_network = std::make_shared(ib, get_network().get_stream_ptr(), get_network().get_engine(), get_network().is_primary_stream(), 0); } +void loop_inst::postprocess_output_memory(bool is_dynamic) { + if (is_dynamic) { + for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { + const auto& output_mapping = _output_primitive_maps.at(i); + const auto& external_id = output_mapping.external_id; + const auto& internal_id = output_mapping.internal_id; + if (output_mapping.axis < 0) { + auto internalOutputPrim = get_body_network()->get_primitive(internal_id.pid); + auto internal_mem = internalOutputPrim->output_memory_ptr(internal_id.idx); + if (internal_mem == nullptr) { + continue; + } + auto externalOutputPrim = _network.get_primitive(external_id.pid); + if (!externalOutputPrim->outputs_allocated()) { + externalOutputPrim->set_output_memory(internal_mem, external_id.idx); + } else { + auto external_mem = externalOutputPrim->output_memory_ptr(external_id.idx); + if (external_mem->get_layout() != internal_mem->get_layout()) { + externalOutputPrim->set_output_memory(internal_mem, external_id.idx); + } else if (external_mem != internal_mem) { + external_mem->copy_from(get_network().get_stream(), *internal_mem); + } + } + } else { + auto externalOutputPrim = _network.get_primitive(external_id.pid); + if (!externalOutputPrim->outputs_allocated() || shape_changed()) { + auto concat_layout = _impl_params->get_output_layout(external_id.idx); + auto concat_mem = _network.get_engine().allocate_memory(concat_layout, 0); + externalOutputPrim->set_output_memory(concat_mem, external_id.idx); + auto iter = std::find_if(concatenated_output_mem_mappings.begin(), + concatenated_output_mem_mappings.end(), + [&](std::shared_ptr &concat_output){ + return concat_output->concat_data_prim->id() == external_id.pid; + }); + if (iter != concatenated_output_mem_mappings.end()) { + (*iter)->update_concatenated_mem(concat_mem); + } + } + } + } + } + + for (size_t i = 0; i < concatenated_output_mem_mappings.size(); ++i) { + const auto& concat_output = concatenated_output_mem_mappings.at(i); + concat_output->restore_concatenated_mem(); + } +} + +void loop_inst::reset_memory() { + backedge_memory_mappings.clear(); + concatenated_input_mem_mappings.clear(); + for (auto concat_mem_map : concatenated_output_mem_mappings) { + concat_mem_map->reset_data_for_shape_changed(); + } +} + + +void loop_inst::update_output_layout() { + if (_node == nullptr) + return; + + auto memory_deps = _node->get_const_memory_deps(); + for (auto& i : _node->get_shape_infer_dependencies()) { + auto dep_id = _node->get_dependency(i).id(); + if (memory_deps.count(i) > 0 || i >= _node->get_dependencies().size()) { + continue; + } + + auto dep_mem = _network.get_output_memory(dep_id); + memory_deps.insert({i, dep_mem}); + } + _impl_params->memory_deps = memory_deps; + + auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params); + if (new_layouts.empty()) { + auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params); + new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[0], new_layout.data_padding); + _impl_params->output_layouts[0] = new_layout; + } else { + if (_impl_params->output_layouts.size() < new_layouts.size()) { + _impl_params->output_layouts.resize(new_layouts.size()); + } + for (size_t i = 0; i < new_layouts.size(); ++i) { + auto new_layout = new_layouts[i]; + new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[i], new_layout.data_padding); + _impl_params->output_layouts[i] = new_layout; + } + } +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 51b988076d18f3..240db96d5b4988 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -758,7 +758,10 @@ void network::reset_execution(bool wait) { get_stream().wait_for_events(events); } } - _events.clear(); + + // Move events to temporarily map to deallocate them at the end of network::execute() call for better overlapping with + // kernels execution, since it may take significant time for high amount of events + _old_events = std::move(_events); } event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) { @@ -1457,6 +1460,9 @@ void network::execute_impl(const std::vector& events) { // In scenarios with a big number of very small networks it can provide performance drop. get_stream().flush(); + // Deallocate events from the previos iteration + _old_events.clear(); + GPU_DEBUG_IF(debug_config->dump_runtime_memory_pool > 0) { get_memory_pool().dump(get_id()); } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index eea18ca1fe6b79..6c1e88de349115 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -425,7 +425,7 @@ event::ptr primitive_inst::realloc_if_needed() { auto current_shape = actual_layout.get_shape(); auto& sp = get_network().get_shape_predictor(); - auto dt_size = data_type_traits::size_of(actual_layout.data_type); + auto dt_size = ov::element::Type(actual_layout.data_type).bitwidth(); auto prealloc_info = sp.predict_preallocation_shape(id(), current_shape, dt_size, can_reuse_buffer); if (prealloc_info.first && sp.can_preallocate(ov::shape_size(prealloc_info.second) * dt_size)) { auto new_layout = actual_layout; @@ -685,6 +685,7 @@ void primitive_inst::do_runtime_skip_reorder() { GPU_DEBUG_TRACE_DETAIL << "[do runtime skip reorder] update shape for user " << u->id() << std::endl; u->update_shape(); u->update_shape_done_by_other = true; + if (u->_impl_params->get_input_layout() == u->_impl_params->get_output_layout()) { std::function>)> update_memory_dependencies; update_memory_dependencies = [&](std::vector> users) { @@ -699,6 +700,10 @@ void primitive_inst::do_runtime_skip_reorder() { update_memory_dependencies(u->get_user_insts()); u->set_can_be_optimized(true); + // Opt out reorder which has _needs_completion_event = true causes syncronization failed in dGPU. + if (_needs_completion_event == false && u->_needs_completion_event == true) { + _needs_completion_event = true; + } GPU_DEBUG_TRACE_DETAIL << "[do runtime skip reorder] set user " << u->id() << " as can_be_optimized" << std::endl; } else { GPU_DEBUG_TRACE_DETAIL << "[do runtime skip reorder] user " << u->id() << " cannot be optimized" << std::endl; diff --git a/src/plugins/intel_gpu/src/graph/prior_box.cpp b/src/plugins/intel_gpu/src/graph/prior_box.cpp index 899f0db6f2ba4a..571a2c6d92c218 100644 --- a/src/plugins/intel_gpu/src/graph/prior_box.cpp +++ b/src/plugins/intel_gpu/src/graph/prior_box.cpp @@ -401,12 +401,12 @@ void prior_box_node::calc_result() { // perform calculations if (get_output_layout().data_type == data_types::f16) - calculate_prior_box_output::type>(result, + calculate_prior_box_output::value_type>(result, get_program().get_stream(), input().get_output_layout(), *typed_desc()); else - calculate_prior_box_output::type>(result, + calculate_prior_box_output::value_type>(result, get_program().get_stream(), input().get_output_layout(), *typed_desc()); diff --git a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp index 684057ee4726f8..f8aa9fb1c08c60 100644 --- a/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/program_dump_graph.cpp @@ -188,6 +188,25 @@ void dump_graph_init(std::ofstream& graph, return out; }; + const auto dump_mem_preferred_info = [](const program_node* ptr) { + std::string out = ""; + auto input_fmts = ptr->get_preferred_input_fmts(); + if (!input_fmts.empty()) { + out += "preferred_in_fmt"; + for (auto& fmt : input_fmts) { + out += ":" + fmt_to_str(fmt); + } + } + auto output_fmts = ptr->get_preferred_output_fmts(); + if (!output_fmts.empty()) { + out += "\npreferred_out_fmt"; + for (auto& fmt : output_fmts) { + out += ":" + fmt_to_str(fmt); + } + } + + return out; + }; graph << "digraph cldnn_program {\n"; for (auto& node : program.get_processing_order()) { @@ -220,6 +239,7 @@ void dump_graph_init(std::ofstream& graph, } } graph << "\n" + dump_mem_info(node); + graph << "\n" + dump_mem_preferred_info(node); graph << "\""; #ifdef __clang__ #pragma clang diagnostic pop diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 62c11e2f7e8066..dc9b2029ff408c 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -208,7 +208,8 @@ std::unique_ptr program_node::desc_to_json() const { #endif impls.push_back(selected_impl->get_kernel_name()); - if (get_preferred_impl_type() == impl_types::ocl) { + auto preferred_impl_type = get_preferred_impl_type(); + if (preferred_impl_type != impl_types::onednn && preferred_impl_type != impl_types::cpu) { json_composite cl_dump_info; cl_dump_info.add("batch_hash", selected_impl->get_kernels_dump_info().first); cl_dump_info.add("kernel_entry", selected_impl->get_kernels_dump_info().second); @@ -331,6 +332,8 @@ layout program_node::get_non_padded_output_layout(bool invalidate_users_if_chang bool program_node::set_output_layout(layout& new_layout, bool invalidate_users_if_changed, size_t idx) { merge_output_padding(new_layout.data_padding, idx); + OPENVINO_ASSERT(idx < output_layouts.size(), id(), " has invalid index : index is ", std::to_string(idx), + " but output_layouts length is ", std::to_string(output_layouts.size())); new_layout.data_padding = output_layouts[idx].data_padding; bool changed = (new_layout != output_layouts[idx]); if (changed && invalidate_users_if_changed) // output_layout has changed! invalidate users diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index 40d2b48d8edfaf..0f64b64a0bcad7 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -69,8 +69,9 @@ scatter_nd_update_inst::typed_primitive_inst(network& network, scatter_nd_update void scatter_nd_update_inst::on_execute() { auto input1_shape = _impl_params->input_layouts[1].get_partial_shape(); auto input2_shape = _impl_params->input_layouts[2].get_partial_shape(); + auto same_layouts = _impl_params->input_layouts[0] == _impl_params->output_layouts[0]; - if ((ov::shape_size(input1_shape.to_shape()) == 0) || (ov::shape_size(input2_shape.to_shape()) == 0)) + if (same_layouts && ((ov::shape_size(input1_shape.to_shape()) == 0) || (ov::shape_size(input2_shape.to_shape()) == 0))) reuse_input(); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index d4992801a80447..f6dacec4a73c80 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -120,7 +120,7 @@ KERNEL(fc)( uint input_offset = out_b * TILE_IN_B_PITCH + INPUT0_OFFSET; uint weights_offset = out_f * INPUT_ELEMENTS_COUNT; -#if COMPRESSED_WEIGHTS +#if COMPRESSED_WEIGHTS && DECOMPRESSION_SCALE_GROUPS_NUM == 1 #if DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD == 0 ACCUMULATOR_VEC_TYPE d_scale = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_scale, out_f); #elif DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD != 0 @@ -134,9 +134,11 @@ KERNEL(fc)( ACCUMULATOR_VEC_TYPE d_scale = decompression_scale[0]; #endif - #if !DECOMPRESSION_ZP_TERM - ACCUMULATOR_VEC_TYPE d_zp = 0; - #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD == 0 + ACCUMULATOR_TYPE* d_scales = (ACCUMULATOR_TYPE*)(&d_scale); +#endif + +#if COMPRESSED_WEIGHTS && DECOMPRESSION_ZP_TERM && DECOMPRESSION_ZP_GROUPS_NUM == 1 + #if DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD == 0 ACCUMULATOR_VEC_TYPE d_zp = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_zp, out_f); #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD != 0 ACCUMULATOR_VEC_TYPE d_zp = 0; @@ -148,9 +150,7 @@ KERNEL(fc)( #else ACCUMULATOR_VEC_TYPE d_zp = decompression_zp[0]; #endif - - ACCUMULATOR_TYPE* ds = (ACCUMULATOR_TYPE*)(&d_scale); - ACCUMULATOR_TYPE* dzp = (ACCUMULATOR_TYPE*)(&d_zp); + ACCUMULATOR_TYPE* d_zps = (ACCUMULATOR_TYPE*)(&d_zp); #endif #if REALIGN_FP16_OFFSET @@ -193,7 +193,28 @@ KERNEL(fc)( ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); unroll_for(uint kii = 0; kii < TILE_K; ++kii) { unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { - w[kii * TILE_OFM + fi] = (w[kii * TILE_OFM + fi] - dzp[fi]) * ds[fi]; + const uint w_idx = kii * TILE_OFM + fi; + const uint offset_ofm = out_f + fi*SIMD + sglid; + #if DECOMPRESSION_SCALE_GROUPS_NUM > 1 + const uint scale_offset = (offset_ofm % DECOMPRESSION_SCALE_BATCH_NUM) * DECOMPRESSION_SCALE_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; + ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; + #else + ACCUMULATOR_TYPE ds = d_scales[fi]; + #endif + + #if DECOMPRESSION_ZP_TERM + #if DECOMPRESSION_ZP_GROUPS_NUM > 1 + const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; + ACCUMULATOR_TYPE dzp = decompression_zp[zp_offset]; + #else + ACCUMULATOR_TYPE dzp = d_zps[fi]; + #endif + #else + ACCUMULATOR_TYPE dzp = ACCUMULATOR_VAL_ZERO; + #endif + w[w_idx] = (w[w_idx] - dzp) * ds; } } #endif @@ -230,7 +251,28 @@ KERNEL(fc)( ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); unroll_for(uint kii = 0; kii < TILE_K; ++kii) { unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { - w[kii * TILE_OFM + fi] = (w[kii * TILE_OFM + fi] - dzp[fi]) * ds[fi]; + const uint w_idx = kii * TILE_OFM + fi; + uint offset_ofm = out_f + fi*SIMD + get_sub_group_local_id(); + #if DECOMPRESSION_SCALE_GROUPS_NUM > 1 + const uint scale_offset = (offset_ofm % DECOMPRESSION_SCALE_BATCH_NUM) * DECOMPRESSION_SCALE_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; + ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; + #else + ACCUMULATOR_TYPE ds = d_scales[fi]; + #endif + + #if DECOMPRESSION_ZP_TERM + #if DECOMPRESSION_ZP_GROUPS_NUM > 1 + const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; + ACCUMULATOR_TYPE dzp = decompression_zp[zp_offset]; + #else + ACCUMULATOR_TYPE dzp = d_zps[fi]; + #endif + #else + ACCUMULATOR_TYPE dzp = ACCUMULATOR_VAL_ZERO; + #endif + w[w_idx] = (w[w_idx] - dzp) * ds; } } #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl index 72e8d6d7d3d855..6374e65c4f5fcc 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl @@ -36,18 +36,24 @@ KERNEL(fc)( for (uint x = 0; x < INPUT0_SIZE_X; ++x) { const uint input0_idx = INPUT0_GET_INDEX(b, ofm, y, x); - const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); #if COMPRESSED_WEIGHTS - ACCUMULATOR_TYPE filter_compressed = TO_ACCUMULATOR_TYPE(weights[filter_idx]); #if DECOMPRESSION_ZP_TERM - ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[DECOMPRESSION_ZP_GET_INDEX_SAFE(0, oym, 0, 0)]); + const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(oym, y / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); + ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); #else ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; #endif - DECOMPRESSION_SCALE_TYPE scale = decompression_scale[DECOMPRESSION_SCALE_GET_INDEX_SAFE(0, oym, 0, 0)]; - ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - TO_ACCUMULATOR_TYPE(zp)) * scale; + const uint decomp_offset = DECOMPRESSION_SCALE_GET_INDEX_SAFE(oym, y / DECOMPRESSION_SCALE_GROUP_SIZE, 0, 0); + DECOMPRESSION_SCALE_TYPE scale = decompression_scale[decomp_offset]; + #endif + + #if COMPRESSED_WEIGHTS_INT8 + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); + ACCUMULATOR_TYPE filter_compressed = TO_ACCUMULATOR_TYPE(weights[filter_idx]); + ACCUMULATOR_TYPE filter_val = (filter_compressed - zp) * scale; dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); #else + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); #endif } @@ -67,19 +73,25 @@ KERNEL(fc)( for (uint x = 0; x < INPUT0_SIZE_X; ++x) { const uint input0_idx = INPUT0_GET_INDEX(b, ifm, y, x); - const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); #if COMPRESSED_WEIGHTS - FILTER_TYPE filter_compressed = weights[filter_idx]; #if DECOMPRESSION_ZP_TERM - ACCUMULATOR_TYPE zp = decompression_zp[DECOMPRESSION_ZP_GET_INDEX_SAFE(0, ofm, 0, 0)]; + const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(ofm, ifm / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); + ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); #else ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; #endif + const uint decomp_offset = DECOMPRESSION_SCALE_GET_INDEX_SAFE(ofm, ifm / DECOMPRESSION_SCALE_GROUP_SIZE, 0, 0); + DECOMPRESSION_SCALE_TYPE scale = decompression_scale[decomp_offset]; + #endif - DECOMPRESSION_SCALE_TYPE scale = decompression_scale[DECOMPRESSION_SCALE_GET_INDEX_SAFE(0, ofm, 0, 0)]; - ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - TO_ACCUMULATOR_TYPE(zp)) * scale; + + #if COMPRESSED_WEIGHTS_INT8 + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); + FILTER_TYPE filter_compressed = weights[filter_idx]; + ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - zp) * scale; dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); #else + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); #endif } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/group_normalization_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/group_normalization_gpu_ref.cl new file mode 100644 index 00000000000000..2715f90780071d --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/group_normalization_gpu_ref.cl @@ -0,0 +1,144 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/batch_headers/common.cl" + +#define NUM_CHANNELS_IN_GROUP (INPUT0_FEATURE_NUM / NUM_GROUPS) +#define CHANNEL_SIZE (INPUT0_BATCH_PITCH / INPUT0_FEATURE_NUM) +#define GROUP_SIZE (NUM_CHANNELS_IN_GROUP * CHANNEL_SIZE) + +#if MEAN_KERNEL_ENABLED || STANDARD_DEVIATION_KERNEL_ENABLED +inline void FUNC(kahan_summation)(INPUT0_TYPE elem, __private float* compensation, __private float* sum) { + if (isfinite(elem) && isfinite(*sum)) { + float temp = *sum + (elem - *compensation); + *compensation = (temp - *sum) - (elem - *compensation); + *sum = temp; + } else { + *sum += elem; + } +} +#endif + +#if MEAN_KERNEL_ENABLED + +KERNEL (calc_mean_ref)( __global INPUT0_TYPE* input + , __global float* output +#if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS +#endif +) +{ + const int batch = get_global_id(0); + if (batch >= INPUT0_BATCH_NUM) + return; + const int group = get_global_id(1); + const int feature_begin = group * NUM_CHANNELS_IN_GROUP; + const int feature_end = group * NUM_CHANNELS_IN_GROUP + NUM_CHANNELS_IN_GROUP; + float variance = 0.f, error = 0.f, mean_value = 0.f; + for (int feature = feature_begin; feature < feature_end; feature++) + { + if (feature >= INPUT0_FEATURE_NUM) + continue; +#if OUTPUT_DIMS > 4 + for (int z = 0; z < INPUT0_SIZE_Z; z++) +#endif + for (int y = 0; y < INPUT0_SIZE_Y; y++) + for (int x = 0; x < INPUT0_SIZE_X; x++) + { +#if OUTPUT_DIMS == 5 + size_t input_idx = INPUT0_GET_INDEX(batch, feature, z, y, x); +#elif OUTPUT_DIMS == 4 + size_t input_idx = INPUT0_GET_INDEX(batch, feature, y, x); +#endif + FUNC_CALL(kahan_summation)(input[input_idx], &error, &mean_value); + } + } + mean_value /= GROUP_SIZE; + output[batch * NUM_GROUPS + group] = mean_value; +} + +#elif STANDARD_DEVIATION_KERNEL_ENABLED + +KERNEL (calc_standard_deviation_ref)( __global INPUT0_TYPE* input + , __global float* mean + , __global float* output +#if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS +#endif +) +{ + const int batch = get_global_id(0); + if (batch >= INPUT0_BATCH_NUM) + return; + const int group = get_global_id(1); + const output_idx = batch * NUM_GROUPS + group; + const int feature_begin = group * NUM_CHANNELS_IN_GROUP; + const int feature_end = group * NUM_CHANNELS_IN_GROUP + NUM_CHANNELS_IN_GROUP; + float variance = 0.f, error = 0.f; + + for (int feature = feature_begin; feature < feature_end; feature++) + { + if (feature >= INPUT0_FEATURE_NUM) + continue; +#if OUTPUT_DIMS > 4 + for (int z = 0; z < INPUT0_SIZE_Z; z++) +#endif + for (int y = 0; y < INPUT0_SIZE_Y; y++) + for (int x = 0; x < INPUT0_SIZE_X; x++) + { +#if OUTPUT_DIMS == 5 + size_t input_idx = INPUT0_GET_INDEX(batch, feature, z, y, x); +#elif OUTPUT_DIMS == 4 + size_t input_idx = INPUT0_GET_INDEX(batch, feature, y, x); +#endif + FUNC_CALL(kahan_summation)(pow(input[input_idx] - mean[output_idx], 2), &error, &variance); + } + } + variance /= GROUP_SIZE; + float standard_deviation = sqrt(variance + EPSILON); + output[output_idx] = standard_deviation; +} +#elif NORMALIZE_KERNEL_ENABLED +KERNEL (normalize_ref)( __global INPUT0_TYPE* input + , __global INPUT0_TYPE* scale_values + , __global INPUT0_TYPE* bias_values + , __global float* mean_values + , __global float* standard_deviation_values + , __global OUTPUT_TYPE* output +#if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS +#endif +) +{ + const int batch = get_global_id(0); +#if OUTPUT_DIMS == 4 + const int feature = get_global_id(1); +#elif OUTPUT_DIMS == 5 + const int feature = get_global_id(1) / OUTPUT_SIZE_Z; + const int z = get_global_id(1) % OUTPUT_SIZE_Z; +#endif + const int y = get_global_id(2) / OUTPUT_SIZE_X; + const int x = get_global_id(2) % OUTPUT_SIZE_X; + const int group = feature / NUM_CHANNELS_IN_GROUP; + float mean = mean_values[batch * NUM_GROUPS + group]; + float standard_deviation = standard_deviation_values[batch * NUM_GROUPS + group]; +#if OUTPUT_DIMS == 4 + size_t output_idx = OUTPUT_GET_INDEX(batch, feature, y, x); +#elif OUTPUT_DIMS == 5 + size_t output_idx = OUTPUT_GET_INDEX(batch, feature, z, y, x); +#endif + OUTPUT_TYPE res = ((input[output_idx] - mean) / standard_deviation) * scale_values[feature] + bias_values[feature]; +#if HAS_FUSED_OPS + FUSED_OPS; + output[output_idx] = FUSED_OPS_RESULT; +#else + output[output_idx] = ACTIVATION(res, ACTIVATION_PARAMS); +#endif +} + +#endif + +#undef NUM_CHANNELS_IN_GROUP +#undef CHANNEL_SIZE +#undef GROUP_SIZE diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl index 9003f23ad1ec8d..052c6721a88141 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl @@ -280,6 +280,8 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32); #elif defined INPUT0_LAYOUT_OS_IYX_OSV32__AI32 return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32); +#elif defined INPUT0_LAYOUT_O_IS_YX_ISV2 + return GET_FILTER_O_IS_ZYX_ISV16_INDEX(INPUT0, o, i, 0, y, x, 2); #elif defined INPUT0_LAYOUT_O_IS_YX_ISV4 return GET_FILTER_O_IS_ZYX_ISV16_INDEX(INPUT0, o, i, 0, y, x, 4); #elif defined INPUT0_LAYOUT_O_IS_YX_ISV16 @@ -501,6 +503,8 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 32); #elif defined OUTPUT_LAYOUT_OS_IYX_OSV64 return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 64); +#elif defined OUTPUT_LAYOUT_O_IS_YX_ISV2 + return GET_FILTER_O_IS_ZYX_ISV16_INDEX(OUTPUT, o, i, 0, y, x, 2); #elif defined OUTPUT_LAYOUT_O_IS_YX_ISV4 return GET_FILTER_O_IS_ZYX_ISV16_INDEX(OUTPUT, o, i, 0, y, x, 4); #elif defined OUTPUT_LAYOUT_O_IS_YX_ISV16 diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/scatter_nd_update_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/scatter_nd_update_ref.cl index 4ec9b665760e34..8c48ad4d4e9979 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/scatter_nd_update_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/scatter_nd_update_ref.cl @@ -36,8 +36,10 @@ KERNEL(scatter_nd_update_ref)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* data, +#ifdef IS_SECOND_ITER const __global INPUT1_TYPE* indices, const __global INPUT2_TYPE* updates, +#endif __global OUTPUT_TYPE* output #if HAS_FUSED_OPS_DECLS , FUSED_OPS_DECLS @@ -56,8 +58,9 @@ KERNEL(scatter_nd_update_ref)(OPTIONAL_SHAPE_INFO_ARG const uint f = dim2 % OUTPUT_FEATURE_NUM; const uint b = dim2 / OUTPUT_FEATURE_NUM; + const uint input_idx = GET_UPDATES_INDEX(INPUT0, ORDER); const uint output_idx = GET_OUTPUT_INDEX(ORDER); - INPUT0_TYPE val = data[output_idx]; + INPUT0_TYPE val = data[input_idx]; #if HAS_FUSED_OPS FUSED_OPS_FIRST_KERNEL; output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT_FIRST_KERNEL); diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 148b6c10e39183..7706da6003fe74 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -32,6 +32,7 @@ enum class KernelType { RESHAPE, COUNT_NONZERO, GATHER_NONZERO, + GROUP_NORMALIZATION, PERMUTE, CONCATENATION, RESAMPLE, diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp index d0d052b44c4ed3..b382561afdac34 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_base_opencl.cpp @@ -80,6 +80,10 @@ std::string KernelBaseOpenCL::GetEntryPoint(const std::string& templateName, // UniqueID = program_id + processing_index + additional weight/reorder tag kernelID += "_" + params.uniqueID + "_" + std::to_string(partID); + // Add "__sa" suffix for shape agnostic kernels + if (params.is_shape_agnostic) + kernelID += "__sa"; + return kernelID; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp index 6e192c92bfb808..8c6d2af2fd8f69 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp @@ -316,6 +316,7 @@ std::string toString(WeightsLayout layout) { case WeightsLayout::os_is_zyx_osv16_isv16: return "OS_IS_ZYX_OSV16_ISV16"; case WeightsLayout::os_is_zyx_osv32_isv16: return "OS_IS_ZYX_OSV32_ISV16"; case WeightsLayout::os_is_zyx_osv64_isv16: return "OS_IS_ZYX_OSV64_ISV16"; + case WeightsLayout::o_is_yx_isv2: return "O_IS_YX_ISV2"; case WeightsLayout::o_is_yx_isv4: return "O_IS_YX_ISV4"; case WeightsLayout::o_is_yx_isv16: return "O_IS_YX_ISV16"; case WeightsLayout::o_is_zyx_isv16: return "O_IS_ZYX_ISV16"; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp index 3e9eb35cdaaff0..a75d35469837f7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp @@ -24,11 +24,23 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par if (params.compressed) { jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS", 1)}); + if (params.weights.GetDType() == WeightsType::INT8 || params.weights.GetDType() == WeightsType::UINT8) { + jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS_INT8", 1)}); + } + + const size_t scale_groups_num = params.decompression_scale.Feature().v; + const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_TERM", 1)}); jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE", params.decompression_scale)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_GROUPS_NUM", scale_groups_num)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_GROUP_SIZE", scale_group_size)}); if (params.has_decompression_zp) { + const size_t zp_groups_num = params.decompression_zero_point.Feature().v; + const size_t zp_group_size = params.weights.IFM().v / params.decompression_zero_point.Feature().v; jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_TERM", 1)}); jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP", params.decompression_zero_point)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUPS_NUM", zp_groups_num)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUP_SIZE", zp_group_size)}); } } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 6b0407f6580cad..c272124627db23 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -241,6 +241,9 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, .Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 1, 1, 1, EXE_MODE_AGE_BASED)); } + if (params.compressed && batch == 1) + selector.Case(tune_params(1, std::min(max_tile_ofm, 2u), 4, 2, 1, 1, EXE_MODE_AGE_BASED)); + selector.Case([&](const fully_connected_params&) -> tune_params { tune_params result(8, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, EXE_MODE_DEFAULT); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_ref.cpp new file mode 100644 index 00000000000000..a6dd21c43fe4e3 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_ref.cpp @@ -0,0 +1,170 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "group_normalization_kernel_ref.h" +#include + +namespace kernel_selector { + +ParamsKey GroupNormalizationKernelRef::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDifferentTypes(); + return k; +} + +static std::size_t InternalBufferSize(const group_normalization_params ¶ms) { + const auto& output = params.outputs[0]; + return output.Batch().v * params.num_groups * sizeof(float); +} + +static GroupNormalizationKernelRef::KernelId operator++(GroupNormalizationKernelRef::KernelId& id) { + id = static_cast(static_cast(id) + 1); + return id; +} + +GroupNormalizationKernelRef::DispatchData GroupNormalizationKernelRef::SetDefault( + KernelId id, const group_normalization_params ¶ms) const { + DispatchData dispatch_data; + auto& output = params.outputs[0]; + switch (id) { + case eCalcMeanKernel: + case eCalcStandardDeviationKernel: { + auto maxWorkGroupSize = params.engineInfo.maxWorkGroupSize; + dispatch_data.gws = std::vector{ + output.Batch().v, + static_cast(params.num_groups), + 1 + }; + dispatch_data.lws = std::vector{ + output.Batch().v * params.num_groups > maxWorkGroupSize ? maxWorkGroupSize / params.num_groups : output.Batch().v, + static_cast(params.num_groups), + 1}; + break; + } + case eNormalize: { + auto in_layout = params.inputs[0].GetLayout(); + auto out_layout = output.GetLayout(); + std::vector> dims_by_gws = { + { Tensor::DataChannelName::BATCH }, + { Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::Z }, + { Tensor::DataChannelName::X, Tensor::DataChannelName::Y }}; + dispatch_data.gws = std::vector{ + output.Batch().v, + output.Feature().v * output.Z().v, + output.X().v * output.Y().v}; + dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo, + in_layout, out_layout, dims_by_gws); + break; + } + default: + assert(false); + break; + } + return dispatch_data; +} + +JitConstants GroupNormalizationKernelRef::GetJitConstants(KernelId kernelId, + const group_normalization_params ¶ms) const { + auto jit = MakeBaseParamsJitConstants(params); + jit.AddConstant(MakeJitConstant("EPSILON", static_cast(params.epsilon))); + jit.AddConstant(MakeJitConstant("NUM_GROUPS", params.num_groups)); + switch (kernelId) { + case eCalcMeanKernel: + jit.AddConstant(MakeJitConstant("MEAN_KERNEL_ENABLED", true)); + break; + case eCalcStandardDeviationKernel: + jit.AddConstant(MakeJitConstant("STANDARD_DEVIATION_KERNEL_ENABLED", true)); + break; + case eNormalize: { + jit.AddConstant(MakeJitConstant("NORMALIZE_KERNEL_ENABLED", true)); + jit.AddConstant(MakeJitConstant("INPUT_INDICES_ORDER", "batch, feature, z, y, x")); + if (!params.fused_ops.empty()) { + FusedOpsConfiguration conf{ + "", + params.outputs[0].Dimentions() == 5 ? std::vector{"batch", "feature", "z", "y", "x"} : + std::vector{"batch", "feature", "y", "x"}, + "res", + params.outputs[0].GetDType() + }; + jit.Merge(MakeFusedOpsJitConstants(params, {conf})); + } + break; + } + default: + assert(false); + break; + } + return jit; +} + +void GroupNormalizationKernelRef::SetKernelArguments(const group_normalization_params& params, + KernelId kernelId, + cldnn::arguments_desc& arguments, + std::vector& internalBufferSizes) { + switch (kernelId) { + case eCalcMeanKernel: { + arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + internalBufferSizes.push_back(InternalBufferSize(params)); + break; + } + case eCalcStandardDeviationKernel: { + arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + internalBufferSizes.push_back(InternalBufferSize(params)); + break; + } + case eNormalize: { + arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + break; + } + default: + assert(false); + break; + } +} + +KernelsData GroupNormalizationKernelRef::GetKernelsData(const Params ¶ms, const optional_params &options) const { + const group_normalization_params& parameters = static_cast(params); + KernelData kd = KernelData::Default(params, eKernelsNum); + kd.internalBufferDataType = Datatype::F32; + for (KernelId id = eCalcMeanKernel; id < eKernelsNum; ++id) { + auto& kernel = kd.kernels[id]; + const auto entryPoint = GetEntryPoint(kernelName, parameters.layerID, params, options, id); + auto jitConstants = GetJitConstants(id, parameters); + const auto jit = CreateJit(kernelName, jitConstants, entryPoint); + const auto dispatchData = SetDefault(id, parameters); + FillCLKernelData(kernel, + dispatchData, + params.engineInfo, + kernelName, + jit, + entryPoint, + "", + false, + false, + 0, + 0, + 0); + SetKernelArguments(parameters, id, kernel.params.arguments, kd.internalBufferSizes); + } + return {kd}; +} + +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_ref.h new file mode 100644 index 00000000000000..0737c4d45089bf --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_ref.h @@ -0,0 +1,60 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "kernel_base_opencl.h" +#include "kernel_selector_params.h" + +namespace kernel_selector { +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// GroupNormalizationParams +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct group_normalization_params : public base_params { + group_normalization_params() : base_params(KernelType::GROUP_NORMALIZATION) {} + + std::int64_t num_groups{}; + double epsilon{}; + + ParamsKey GetParamsKey() const override { + return base_params::GetParamsKey(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// group_normalization_optional_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct group_normalization_optional_params : optional_params { + group_normalization_optional_params() : optional_params(KernelType::GROUP_NORMALIZATION) {} +}; + +class GroupNormalizationKernelRef : public KernelBaseOpenCL { +public: + using DispatchData = CommonDispatchData; + enum KernelId { + eCalcMeanKernel, + eCalcStandardDeviationKernel, + eNormalize, + eKernelsNum + }; + + GroupNormalizationKernelRef() : KernelBaseOpenCL{"group_normalization_gpu_ref"} {} + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + std::vector GetSupportedFusedOps() const override { + return { + FusedOpType::ACTIVATION, + FusedOpType::QUANTIZE, + FusedOpType::ELTWISE + }; + } + +protected: + DispatchData SetDefault(KernelId id, const group_normalization_params& params) const; + JitConstants GetJitConstants(KernelId kernelId, const group_normalization_params& params) const; + static void SetKernelArguments(const group_normalization_params& params, + KernelId kernelId, + cldnn::arguments_desc& arguments, + std::vector& internalBufferSizes); +}; + +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_selector.cpp new file mode 100644 index 00000000000000..40a5044c5216b5 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_selector.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "group_normalization_kernel_selector.h" +#include "group_normalization_kernel_ref.h" + +namespace kernel_selector { + +group_normalization_kernel_selector::group_normalization_kernel_selector() { + Attach(); +} + +KernelsData group_normalization_kernel_selector::GetBestKernels(const Params ¶ms, + const optional_params &options) const { + return GetNaiveBestKernel(params, options, KernelType::GROUP_NORMALIZATION); +} + +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_selector.h new file mode 100644 index 00000000000000..8e8579e61de41c --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/group_normalization/group_normalization_kernel_selector.h @@ -0,0 +1,19 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "kernel_selector.h" + +namespace kernel_selector { +class group_normalization_kernel_selector : public kernel_selector_base { +public: + static group_normalization_kernel_selector& Instance() { + static group_normalization_kernel_selector instance_; + return instance_; + } + + group_normalization_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp index 9fbe45f3da02a4..1680d39ca27bb6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_nd_update_kernel_ref.cpp @@ -170,6 +170,10 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const kd.kernels[i].params.workGroups.global = dispatchData.gws; kd.kernels[i].params.workGroups.local = dispatchData.lws; kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params); + + // Do not skip copy stage if output buffer is not empty or requires modification + if (i == 0 && prim_params.outputs[0].LogicalSize() != 0 && prim_params.outputs[0] != prim_params.inputs[0]) + kd.kernels[i].skip_execution = false; } }; @@ -178,6 +182,7 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const for (int i = 0; i < 2; i++) { auto dispatchData = SetDefault(newParams, (i == 1)); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, i); + auto inputs_number = i == 0 ? 1 : 3; if (i == 1) { size_t input0_rank = newParams.inputs[0].LogicalDims().size(); @@ -213,7 +218,7 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const clKernelData& kernel = kd.kernels[i]; FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, - "", false, false, 3, GetFusedPrimitiveInputsCount(params), 1, newParams.has_dynamic_tensors()); + "", false, false, inputs_number, GetFusedPrimitiveInputsCount(params), 1, newParams.has_dynamic_tensors()); } return {kd}; diff --git a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp index b352059d850dea..3fcd03bdece0db 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp @@ -86,6 +86,7 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{ { WeightsLayout::os_iyx_osv32__ai32, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::os_iyx_osv64, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::os_iyx_osv16_rotate_180, { 0, 1, -1, 2, 3, -1 } }, + { WeightsLayout::o_is_yx_isv2, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::o_is_yx_isv4, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::o_is_yx_isv16, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::o_is_zyx_isv16, { 0, 1, 2, 3, 4, -1 } }, @@ -617,6 +618,10 @@ NDims WeightsTensor::GetSimpleDims(const std::vector& d, WeightsLayout l // TODO: It's not the right pitches. it's here in order to calculate physical size switch (l) { + case o_is_yx_isv2: + assert(newDims.size() == 4); + newDims[2] = RoundUp(newDims[2], 2); + break; case o_is_yx_isv4: assert(newDims.size() == 4); newDims[2] = RoundUp(newDims[2], 4); diff --git a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h index 3d54dfabade1c0..205b3198a7a103 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h +++ b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h @@ -94,6 +94,7 @@ enum WeightsLayout { oxiy, iyxo, yxio, + o_is_yx_isv2, o_is_yx_isv4, o_is_yx_isv16, o_is_zyx_isv16, @@ -621,6 +622,10 @@ struct TensorBaseT : public TensorBase { return same; } + bool operator!=(const TensorBaseT& t) const { + return !(*this == t); + } + bool SameDims(const TensorBaseT& t) const { bool same = dtype == t.dtype && layout == t.layout && dims.size() == t.dims.size(); if (same) { diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 3b8581a1e2e34e..fc04e35748fe6c 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -35,6 +35,20 @@ std::shared_ptr create_task_executor(const std::sh if (config.get_property(ov::internal::exclusive_async_requests)) { //exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); + } else if (config.get_property(ov::hint::enable_cpu_pinning)) { + auto executor_config = + ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", + 0, + 0, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + 1, + 0, + 0, + ov::threading::IStreamsExecutor::Config::PreferredCoreType::BIG, + {{config.get_property(ov::num_streams), MAIN_CORE_PROC, 1, 0, 0}}, + true}; + auto post_config = ov::threading::IStreamsExecutor::Config::reserve_cpu_threads(executor_config); + return std::make_shared(post_config); } else { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); @@ -250,6 +264,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { // Configs ov::PropertyName{ov::enable_profiling.name(), PropertyMutability::RO}, + ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::model_priority.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::hint::host_task_priority.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RO}, diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index d9b4e77314e600..c25726f673a2f8 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -22,6 +22,12 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ << ", num inputs: " << op->get_input_size() << std::endl; auto config = p.get_config(); + { + auto custom_outputs = config.get_property(ov::intel_gpu::custom_outputs); + if (!custom_outputs.empty()) { + config.set_property(ov::intel_gpu::custom_outputs(std::vector({}))); + } + } config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic())); @@ -61,10 +67,13 @@ static void CreateIfOp(ProgramBuilder& p, const std::shared_ptr& auto branch_true = gen_branch(p, op, idx_true); auto branch_false = gen_branch(p, op, idx_false); + const size_t num_outputs = op->get_output_size(); + const cldnn::condition conditionPrimitive(layerName, inputs, branch_true, - branch_false); + branch_false, + num_outputs); p.add_primitive(*op, conditionPrimitive); } diff --git a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp index b74d05e4f9aca6..b12536b10ccb9a 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp @@ -19,6 +19,8 @@ #include "openvino/op/roi_align.hpp" #include "openvino/op/variadic_split.hpp" #include "openvino/op/util/op_types.hpp" +#include "openvino/op/loop.hpp" +#include "openvino/op/tensor_iterator.hpp" #include "intel_gpu/primitives/data.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" @@ -206,6 +208,13 @@ static void CreateConstantOp(ProgramBuilder& p, const std::shared_ptr(outOp) || ov::is_type(outOp)) { consts[op].needsBatchInterpretation = constDims.size() == 1; + } else if ((ov::is_type(outOp) || ov::is_type(outOp))) { + // when inner network has 1d parameter which is connected to outer loop's constant 1d data, + // outer constant 1d data and inner 1d parameter has same bytes_count but layout is different + // (outer constant is [1, N, 1, 1] but inner parameter is [N, 1, 1, 1]). + // To pass check_memory_to_set in input_layout::set_data for this case, Set constDims to [N, 1, 1, 1] + // when constDims is one dim and user op is Loop or TensorIterator. + consts[op].needsBatchInterpretation = constDims.size() == 1; } } diff --git a/src/plugins/intel_gpu/src/plugin/ops/gather.cpp b/src/plugins/intel_gpu/src/plugin/ops/gather.cpp index 883ebaba1a6dc2..7d941375d5ae14 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/gather.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/gather.cpp @@ -119,6 +119,7 @@ void CreateGatherOpBase(ProgramBuilder& p, const std::shared_ptr& op, const i reordered_inputs[0], reordered_inputs[1], axis, + input_rank, out_shape, batch_dim, support_neg_ind); diff --git a/src/plugins/intel_gpu/src/plugin/ops/group_normalization.cpp b/src/plugins/intel_gpu/src/plugin/ops/group_normalization.cpp new file mode 100644 index 00000000000000..8c17d111331a67 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/ops/group_normalization.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/group_normalization.hpp" +#include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/primitives/group_normalization.hpp" + +namespace ov { +namespace intel_gpu { + +static void CreateGroupNormalizationOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {3}); + auto inputs = p.GetInputInfo(op); + auto layerName = layer_type_name_ID(op); + cldnn::group_normalization groupNormalizationPrimitive { + layerName, + inputs[0], + inputs[1], + inputs[2], + op->get_num_groups(), + op->get_epsilon() + }; + p.add_primitive(*op, groupNormalizationPrimitive); +} + +REGISTER_FACTORY_IMPL(v12, GroupNormalization); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index f44dddb26ba0e6..628b0d7c37d9aa 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -5,6 +5,7 @@ #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/plugin.hpp" +#include "openvino/op/tensor_iterator.hpp" #include "openvino/op/loop.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/util/sub_graph_base.hpp" @@ -20,13 +21,14 @@ #include using Loop = ov::op::v5::Loop; +using TensorIterator = ov::op::v0::TensorIterator; namespace ov { namespace intel_gpu { template -static DATA_TYPE CreateScalarData(ProgramBuilder &p, const cldnn::primitive_id& id, int64_t num) { - auto mem = p.get_engine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); +static DATA_TYPE CreateScalarData(ProgramBuilder &p, const cldnn::primitive_id& id, ov::Shape& shape, cldnn::data_types dtype, int64_t num) { + auto mem = p.get_engine().allocate_memory({ shape, dtype, cldnn::format::bfyx }); cldnn::mem_lock ptr{mem, p.get_engine().get_service_stream()}; *ptr.begin() = num; return {id, mem}; @@ -40,52 +42,37 @@ static cldnn::mutable_data CreateAdditionalOutputData(ProgramBuilder &p, const s const auto tensor = tensor_from_dims(op->get_output_shape(output_idx)); cldnn::layout output_layout = cldnn::layout(precision, format, tensor); auto mem = p.get_engine().allocate_memory(output_layout); - auto md = cldnn::mutable_data(id, {cldnn::input_info(input)}, mem); // cldnn::data cannot set dependency + auto md = cldnn::mutable_data(id, {cldnn::input_info(input)}, std::move(mem)); // cldnn::data cannot set dependency return md; } -static void CreateLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { +static void SetLoopInputOutputMap(ProgramBuilder& p, + const std::shared_ptr& op, + cldnn::primitive::input_info_arr& inputs, + std::vector& input_primitive_maps, + std::vector& output_primitive_maps, + std::vector& back_edges_maps) { const std::string layerName = layer_type_name_ID(op); - auto inputs = p.GetInputInfo(op); const auto& loop_input_descs = op->get_input_descriptions(); const auto& loop_output_descs = op->get_output_descriptions(); const auto& body_inputs = op->get_function()->get_parameters(); const auto& body_outputs = op->get_function()->get_results(); - // Set special body ports: current_iteration input , execution condition output - auto special_body_ports = op->get_special_body_ports(); - - std::string body_current_iteration_id; - if (special_body_ports.current_iteration_input_idx >= 0) { - auto current_iteration_input = body_inputs.at(special_body_ports.current_iteration_input_idx); - body_current_iteration_id = layer_type_name_ID(current_iteration_input); - std::string input_name = ov::op::util::create_ie_output_name(current_iteration_input); - } - - cldnn::primitive_id body_execution_condition_id; - if (special_body_ports.body_condition_output_idx >= 0) { - auto body_condition_output = body_outputs.at(special_body_ports.body_condition_output_idx)->get_input_node_shared_ptr(0); - body_execution_condition_id = layer_type_name_ID(body_condition_output); - } - - // get body topology from ov::Model - ProgramBuilder body_program(op->get_function(), p.get_engine(), p.get_config(), true); - auto body_topology = *body_program.get_topology(); - - // setup input_primitive_maps/ output_primitive_maps and back_edges - std::vector input_primitive_maps; - std::vector output_primitive_maps; - std::vector back_edges; + bool use_new_shape_infer = p.use_new_shape_infer(); // set input mapping & back edges for (const auto& loop_input_desc : loop_input_descs) { - const cldnn::primitive_id& external_id = inputs.at(loop_input_desc->m_input_index).pid; + auto external_id = inputs.at(loop_input_desc->m_input_index); auto& body_input = body_inputs.at(loop_input_desc->m_body_parameter_index); cldnn::primitive_id internal_id = layer_type_name_ID(body_input); + GPU_DEBUG_LOG << "loop_input_descs[" << layerName << "] = {m_input_index:" << loop_input_desc->m_input_index << "(external_id: " + << external_id << "), m_body_parameter_index:" << loop_input_desc->m_body_parameter_index + << "(internal_id: " << internal_id << ")}" << std::endl; + // set input mapping if (const auto& sliceInfo = - std::dynamic_pointer_cast(loop_input_desc)) { + std::dynamic_pointer_cast(loop_input_desc)) { // sliced input input_primitive_maps.emplace_back(external_id, internal_id, sliceInfo->m_axis, sliceInfo->m_start, sliceInfo->m_end, sliceInfo->m_stride); @@ -96,7 +83,7 @@ static void CreateLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { // set back edges if (const auto& mergedInput = - std::dynamic_pointer_cast(loop_input_desc)) { + std::dynamic_pointer_cast(loop_input_desc)) { // backedge const auto& to = body_inputs.at(mergedInput->m_body_parameter_index); const auto& from = body_outputs.at(mergedInput->m_body_value_index); @@ -104,81 +91,234 @@ static void CreateLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { cldnn::primitive_id to_id = layer_type_name_ID(to); cldnn::primitive_id from_id = layer_type_name_ID(from); - // reset output data type because the data types of the outputs of the - // body topology are always FP32 regardless of element type - { - const auto from_prim = body_topology.at(from_id); - const auto to_cldnn_type = cldnn::element_type_to_data_type(to->get_element_type()); - from_prim->output_data_types = {to_cldnn_type}; - } - back_edges.emplace_back(from_id, to_id); + back_edges_maps.emplace_back(from_id, to_id); } } - // set trip count, initial execution condition, num iteration primitives - // they should be mutable_data to prevent from being optimized out - const cldnn::primitive_id trip_count_id = layer_type_name_ID(op->get_input_node_shared_ptr(0)); - const cldnn::primitive_id execution_condition_id = layer_type_name_ID(op->get_input_node_shared_ptr(1)); - const int64_t num_iterations = op->get_num_iterations(); - if (num_iterations < 0) { - OPENVINO_THROW("loop's num_iteration cannot be negative"); + // set output mapping + if (use_new_shape_infer) { + for (const auto& loop_output_desc : loop_output_descs) { + cldnn::input_info external_input_info(layerName, loop_output_desc->m_output_index); + p.primitive_ids[layerName] = layerName; + + const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index); + cldnn::primitive_id internal_id = layer_type_name_ID(body_output); + + // update primitive_map + if (const auto& concatOutput = + std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires concatenation + output_primitive_maps.emplace_back(external_input_info, internal_id, concatOutput->m_axis, + concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride); + GPU_DEBUG_LOG << "loop_output_descs[" << layerName << "][ConcatOutputDescription] external:" + << external_input_info << ", internal:" + << internal_id << "(axis, start, end, stride)={" + << concatOutput->m_axis << "," << concatOutput->m_start << "," + << concatOutput->m_end << "," << concatOutput->m_stride << "}" << std::endl; + } + if (std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires no concatenation + output_primitive_maps.emplace_back(external_input_info, internal_id); + GPU_DEBUG_LOG << "loop_output_descs[" << layerName << "][BodyOutputDescription] external:" + << external_input_info << ", internal:" << internal_id << std::endl; + } + } + } else { + for (const auto& loop_output_desc : loop_output_descs) { + const uint64_t output_idx = loop_output_desc->m_output_index; + + // Add additional mutable_data for multiple outputs + // primitive ID should be . if output_idx > 0 + // otherwise primitive ID should be equals to TI primitive ID + const std::string layerNameWithIndex = layerName + ".out" + std::to_string(output_idx); + std::string external_id; + if (output_idx > 0) { + cldnn::mutable_data output_data = CreateAdditionalOutputData(p, op, layerNameWithIndex, layerName, output_idx); + p.add_primitive(*op, std::move(output_data)); + external_id = layerNameWithIndex; + } else { + p.primitive_ids[layerNameWithIndex] = layerName; + p.primitive_ids[layerName] = layerName; + external_id = layerName; + } + const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index); + cldnn::primitive_id internal_id = layer_type_name_ID(body_output); + + // update primitive_map + if (const auto& concatOutput = + std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires concatenation + output_primitive_maps.emplace_back(external_id, internal_id, concatOutput->m_axis, + concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride); + GPU_DEBUG_LOG << "loop_output_descs[" << layerName << "][ConcatOutputDescription] external:" + << external_id << ", internal:" + << internal_id << "(axis, start, end, stride)={" + << concatOutput->m_axis << "," << concatOutput->m_start << "," + << concatOutput->m_end << "," << concatOutput->m_stride << "}" << std::endl; + } + if (std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires no concatenation + output_primitive_maps.emplace_back(external_id, internal_id); + GPU_DEBUG_LOG << "loop_output_descs[" << layerName << "][BodyOutputDescription] external:" + << external_id << ", internal:" << internal_id << std::endl; + } + } } - const cldnn::primitive_id num_iteration_id = layerName + "_numIteration"; - { - cldnn::mutable_data num_iteration = CreateScalarData(p, num_iteration_id, 0); - p.add_primitive(*op, std::move(num_iteration)); +} + +static std::vector GetOutputNames(const cldnn::primitive_id id, + const cldnn::primitive_id body_execution_condition_id, + const std::vector& output_primitive_maps, + const std::vector& back_edges) { + std::vector output_names; + OPENVINO_ASSERT(!output_primitive_maps.empty(), "[GPU] Output primitive map should have at least 1 mapping in primitive ", id); + for (auto out_map : output_primitive_maps) { + output_names.push_back(out_map.internal_id.pid); } - // set output mapping - for (const auto& loop_output_desc : loop_output_descs) { - const uint64_t output_idx = loop_output_desc->m_output_index; - - // Add additional mutable_data for multiple outputs - // primitive ID should be . if output_idx > 0 - // otherwise primitive ID should be equals to TI primitive ID - const std::string layerNameWithIndex = layerName + ".out" + std::to_string(output_idx); - std::string external_id; - if (output_idx > 0) { - cldnn::mutable_data output_data = CreateAdditionalOutputData(p, op, layerNameWithIndex, layerName, output_idx); - p.add_primitive(*op, std::move(output_data)); - external_id = layerNameWithIndex; - } else { - external_id = layerName; + // setup outputs for backedges + for (auto& back_edge : back_edges) { + auto iter = std::find(output_names.begin(), output_names.end(), back_edge.from); + // Do not add duplicated output name + if (iter == output_names.end()) { + output_names.push_back(back_edge.from); } - const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index); - cldnn::primitive_id internal_id = layer_type_name_ID(body_output); - - // update primitive_map - if (const auto& concatOutput = - std::dynamic_pointer_cast(loop_output_desc)) { - // output which requires concatenation - output_primitive_maps.emplace_back(external_id, internal_id, concatOutput->m_axis, - concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride); + } + + // if execution_condition_id is specified, we need to add the id in build_option::outputs + if (!body_execution_condition_id.empty()) { + output_names.push_back(body_execution_condition_id); + } + + return output_names; +} + +static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr& op, bool is_loop_op) { + const std::string layerName = layer_type_name_ID(op); + auto inputs = p.GetInputInfo(op); + bool is_dynamic = p.use_new_shape_infer() || op->is_dynamic(); + + int64_t num_iterations = op->get_num_iterations(); + OPENVINO_ASSERT((is_dynamic || num_iterations > 0), "loop's num_iteration should be positive on static shape model"); + + auto num_outputs = is_dynamic? op->get_output_size() : 1; + auto ov_model = op->get_function(); + + // Set special body ports: current_iteration input , execution condition output + cldnn::primitive_id body_current_iteration_id; + cldnn::primitive_id body_execution_condition_id; + cldnn::primitive_id trip_count_id; + cldnn::primitive_id first_execution_condition_id; + cldnn::primitive_id updated_current_iteration_id; + + std::shared_ptr current_iteration_input_op; + if (is_loop_op) { + auto loop_op = std::dynamic_pointer_cast(op); + auto special_body_ports = loop_op->get_special_body_ports(); + if (special_body_ports.current_iteration_input_idx >= 0) { + const auto& body_inputs = loop_op->get_function()->get_parameters(); + current_iteration_input_op = body_inputs.at(special_body_ports.current_iteration_input_idx); + body_current_iteration_id = layer_type_name_ID(current_iteration_input_op); } - if (std::dynamic_pointer_cast(loop_output_desc)) { - // output which requires no concatenation - output_primitive_maps.emplace_back(external_id, internal_id); + + if (special_body_ports.body_condition_output_idx >= 0) { + const auto& body_outputs = loop_op->get_function()->get_results(); + auto body_condition_output = body_outputs.at(special_body_ports.body_condition_output_idx)->get_input_node_shared_ptr(0); + body_execution_condition_id = layer_type_name_ID(body_condition_output); } + + trip_count_id = layer_type_name_ID(loop_op->get_input_node_shared_ptr(0)); + first_execution_condition_id = layer_type_name_ID(loop_op->get_input_node_shared_ptr(1)); + } + + // setup input_primitive_maps/ output_primitive_maps and back_edges + std::vector input_primitive_maps; + std::vector output_primitive_maps; + std::vector back_edges; + + SetLoopInputOutputMap(p, op, inputs, input_primitive_maps, output_primitive_maps, back_edges); + + auto shape = is_dynamic? ngraph::Shape{1} : ngraph::Shape{1, 1, 1, 1}; + auto prec = ngraph::element::i64; + if (current_iteration_input_op) { + current_iteration_input_op->set_output_type(0, prec, shape); + current_iteration_input_op->set_partial_shape(shape); + current_iteration_input_op->set_element_type(prec); + + auto increment_value_id = current_iteration_input_op->get_friendly_name() + "_inc"; + auto increment_value_op = std::make_shared(prec, shape, 1); + increment_value_op->set_friendly_name(increment_value_id); + + auto update_current_iter_op_id = current_iteration_input_op->get_friendly_name() + "_update"; + auto update_current_iter_op = std::make_shared(current_iteration_input_op, increment_value_op); + update_current_iter_op->set_friendly_name(update_current_iter_op_id); + updated_current_iteration_id = layer_type_name_ID(update_current_iter_op); + + auto result = std::make_shared(update_current_iter_op); + ov_model->add_results({result}); + } + + // set trip count, num iteration primitives + // they should be mutable_data to prevent from being optimized out + const cldnn::primitive_id num_iteration_id = layerName + "_numIteration"; + cldnn::mutable_data num_iteration_data = CreateScalarData(p, num_iteration_id, shape, prec, 0); + + p.add_primitive(*op, std::move(num_iteration_data)); + inputs.insert(inputs.begin(), cldnn::input_info(num_iteration_id, 0)); + + if (!body_current_iteration_id.empty()) { + // update input_primitive_maps and back_edges for current_iteration nodes + input_primitive_maps.emplace_back(cldnn::input_info(num_iteration_id), cldnn::input_info(body_current_iteration_id)); + back_edges.emplace_back(updated_current_iteration_id, body_current_iteration_id); } + auto output_names_vec = GetOutputNames(layerName, body_execution_condition_id, output_primitive_maps, back_edges); + + auto config = p.get_config(); + config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); + config.set_property(ov::intel_gpu::max_dynamic_batch(1)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic)); + + // get body program from ov::Model + ProgramBuilder prog(ov_model, p.get_engine(), config, false, false, p.get_task_executor(), true); + auto body_program = prog.get_compiled_program(); + + GPU_DEBUG_LOG << "* trip_count_id : " << trip_count_id << std::endl; + GPU_DEBUG_LOG << "* num_iteration_id : " << num_iteration_id << std::endl; + GPU_DEBUG_LOG << "* body_current_iteration_id : " << body_current_iteration_id << std::endl; + GPU_DEBUG_LOG << "* first_execution_condition_id : " << first_execution_condition_id << std::endl; + GPU_DEBUG_LOG << "* body_execution_condition_id : " << body_execution_condition_id << std::endl; + const cldnn::loop loopPrimitive( - layerName, /* layer name of this primitive (output id) */ - inputs, /* inputs of this layer */ - body_topology, /* body network */ - trip_count_id, /* trip_count data in outer network, always same as num_iterations in TI */ - execution_condition_id, /* initial_execution_condition data in outer network, always true in TI */ - num_iteration_id, /* actual number of iteration data in body network */ - input_primitive_maps, /* input mappings connecting outer network and inner network */ - output_primitive_maps, /* output mappings connecting outer network and inner network */ - back_edges, /* back edge mapping */ - num_iterations, /* max iteration, i.e. length of iteration axis */ + layerName, /* layer name of this primitive (output id) */ + inputs, /* inputs of this layer */ + body_program, /* body network */ + trip_count_id, /* trip_count data in outer network, always same as num_iterations in TI */ + first_execution_condition_id, /* initial_execution_condition data in outer network, always true in TI */ + num_iteration_id, /* actual number of iteration data in body network */ + input_primitive_maps, /* input mappings connecting outer network and inner network */ + output_primitive_maps, /* output mappings connecting outer network and inner network */ + back_edges, /* back edge mapping */ + num_iterations, /* max iteration, i.e. length of iteration axis */ body_current_iteration_id, - body_execution_condition_id); + body_execution_condition_id, + num_outputs); p.add_primitive(*op, loopPrimitive); } +static void CreateLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { + CreateCommonLoopOp(p, op, true); +} + +/* The above code is a comment in C++ programming language. It is not doing anything in terms of code +execution. It is simply providing information or documentation about the code. */ +static void CreateTensorIteratorOp(ProgramBuilder& p, const std::shared_ptr& op) { + CreateCommonLoopOp(p, op, false); +} + REGISTER_FACTORY_IMPL(v5, Loop); +REGISTER_FACTORY_IMPL(v0, TensorIterator); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp b/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp deleted file mode 100644 index 21c7d3a8167a91..00000000000000 --- a/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/plugin/program_builder.hpp" -#include "intel_gpu/plugin/common_utils.hpp" -#include "intel_gpu/plugin/plugin.hpp" - -#include - -#include "openvino/op/tensor_iterator.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/util/sub_graph_base.hpp" - -#include "intel_gpu/primitives/loop.hpp" -#include "intel_gpu/primitives/mutable_data.hpp" -#include "intel_gpu/primitives/data.hpp" -#include "intel_gpu/primitives/reorder.hpp" -#include "intel_gpu/graph/topology.hpp" - -#include -#include - -using TensorIterator = ov::op::v0::TensorIterator; - -namespace ov { -namespace intel_gpu { - -template -static DATA_TYPE CreateScalarData(ProgramBuilder &p, const cldnn::primitive_id& id, int64_t num) { - auto mem = p.get_engine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); - cldnn::mem_lock ptr{mem, p.get_engine().get_service_stream()}; - *ptr.begin() = num; - return {id, mem}; -} - -static cldnn::mutable_data CreateAdditionalOutputData(ProgramBuilder &p, const std::shared_ptr& op, - const cldnn::primitive_id& id, const cldnn::primitive_id& input, - const int32_t output_idx) { - const auto precision = cldnn::element_type_to_data_type(op->get_output_element_type(output_idx)); - const auto format = cldnn::format::get_default_format(op->get_output_shape(output_idx).size()); - const auto tensor = tensor_from_dims(op->get_output_shape(output_idx)); - cldnn::layout output_layout = cldnn::layout(precision, format, tensor); - auto mem = p.get_engine().allocate_memory(output_layout); - auto md = cldnn::mutable_data(id, {cldnn::input_info(input)}, std::move(mem)); // cldnn::data cannot set dependency - return md; -} - -static void CreateTensorIteratorOp(ProgramBuilder &p, const std::shared_ptr &op) { - auto inputs = p.GetInputInfo(op); - - ProgramBuilder body_program(op->get_body(), p.get_engine(), p.get_config(), true); - auto body_topology = *body_program.get_topology(); - - // setup input_primitive_maps/ output_primitive_maps and back_edges - const auto& loop_input_descs = op->get_input_descriptions(); - const auto& loop_output_descs = op->get_output_descriptions(); - const auto& body_inputs = op->get_body()->get_parameters(); - const auto& body_outputs = op->get_body()->get_results(); - - std::vector input_primitive_maps; - std::vector output_primitive_maps; - std::vector back_edges; - std::map reordered_output_ids; - - // set input mapping & back edges - for (const auto& loop_input_desc : loop_input_descs) { - const cldnn::primitive_id& external_id = inputs.at(loop_input_desc->m_input_index).pid; - auto& body_input = body_inputs.at(loop_input_desc->m_body_parameter_index); - cldnn::primitive_id internal_id = layer_type_name_ID(body_input); - - // set input mapping - if (const auto& sliceInfo = - std::dynamic_pointer_cast(loop_input_desc)) { - // sliced input - input_primitive_maps.emplace_back(external_id, internal_id, sliceInfo->m_axis, - sliceInfo->m_start, sliceInfo->m_end, sliceInfo->m_stride); - } else { - // input without slicing - input_primitive_maps.emplace_back(external_id, internal_id); - } - - // set back edges - if (const auto& mergedInput = - std::dynamic_pointer_cast(loop_input_desc)) { - // backedge - const auto& to = body_inputs.at(mergedInput->m_body_parameter_index); - const auto& from = body_outputs.at(mergedInput->m_body_value_index); - - cldnn::primitive_id to_id = layer_type_name_ID(to); - cldnn::primitive_id from_id = layer_type_name_ID(from); - - // reset output data type because the data types of the outputs of the - // body topology are always FP32 regardless of element type - { - const auto from_prim = body_topology.at(from_id); - const auto to_cldnn_type = cldnn::element_type_to_data_type(to->get_element_type()); - from_prim->output_data_types = {to_cldnn_type}; - } - back_edges.emplace_back(from_id, to_id); - } - } - - // set trip count, initial execution condition, num iteration primitives - // they should be mutable_data to prevent from being optimized out - std::string layerName = layer_type_name_ID(op); - const cldnn::primitive_id trip_count_id = layerName + "_tripCount"; - const int64_t num_iterations = op->get_num_iterations(); - if (num_iterations < 0) { - throw std::runtime_error("tensor iterator's num_iteration cannot be negative"); - } - { - cldnn::data trip_count = CreateScalarData(p, trip_count_id, num_iterations); - p.add_primitive(*op, trip_count); - } - const cldnn::primitive_id execution_condition_id = layerName + "_initialExecutionCondition"; - { - cldnn::mutable_data execution_condition = CreateScalarData(p, execution_condition_id, 1); - p.add_primitive(*op, std::move(execution_condition)); - } - const cldnn::primitive_id num_iteration_id = layerName + "_numIteration"; - { - cldnn::mutable_data num_iteration = CreateScalarData(p, num_iteration_id, 0); - p.add_primitive(*op, num_iteration); - } - - // set output mapping - for (const auto& loop_output_desc : loop_output_descs) { - const uint64_t output_idx = loop_output_desc->m_output_index; - - // Add additional mutable_data for multiple outputs - // primitive ID should be . if output_idx > 0 - // otherwise primitive ID should be equals to TI primitive ID - const std::string layerNameWithIndex = layerName + ".out" + std::to_string(output_idx); - std::string external_id; - if (output_idx > 0) { - cldnn::mutable_data output_data = CreateAdditionalOutputData(p, op, layerNameWithIndex, layerName, output_idx); - p.add_primitive(*op, std::move(output_data)); - external_id = layerNameWithIndex; - } else { - p.primitive_ids[layerNameWithIndex] = layerName; - p.primitive_ids[layerName] = layerName; - external_id = layerName; - } - const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index); - cldnn::primitive_id internal_id = layer_type_name_ID(body_output); - - // update primitive_map - if (const auto& concatOutput = - std::dynamic_pointer_cast(loop_output_desc)) { - // output which requires concatenation - output_primitive_maps.emplace_back(external_id, internal_id, concatOutput->m_axis, - concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride); - } - if (std::dynamic_pointer_cast(loop_output_desc)) { - // output which requires no concatenation - output_primitive_maps.emplace_back(external_id, internal_id); - } - } - - const cldnn::loop loopPrimitive( - layerName, /* layer name of this primitive (output id) */ - inputs, /* inputs of this layer */ - body_topology, /* body network */ - trip_count_id, /* trip_count data in outer network, always same as num_iterations in TI */ - execution_condition_id, /* initial_execution_condition data in outer network, always true in TI */ - num_iteration_id, /* actual number of iteration data in body network */ - input_primitive_maps, /* input mappings connecting outer network and inner network */ - output_primitive_maps, /* output mappings connecting outer network and inner network */ - back_edges, /* back edge mapping */ - num_iterations, /* max iteration, i.e. length of iteration axis */ - "", - ""); - - p.add_primitive(*op, loopPrimitive); -} - -REGISTER_FACTORY_IMPL(v0, TensorIterator); - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 55f4f5e7a42065..388269ddbb424d 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -593,6 +593,7 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW}, ov::PropertyName{ov::device::id.name(), PropertyMutability::RW}, }; diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 404818ce92ce8c..a97b7e87a9e4b3 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -158,7 +158,7 @@ std::shared_ptr ProgramBuilder::build(const std::vector - -namespace ov { -namespace intel_gpu { - -void* USMHostAllocator::allocate(const size_t bytes, const size_t /* alignment */) noexcept { - try { - ov::AnyMap params = { ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::USM_HOST_BUFFER) }; - _usm_host_tensor = _context->create_tensor(ov::element::u8, {bytes}, params); - if (auto casted = std::dynamic_pointer_cast(_usm_host_tensor._ptr)) { - return casted->get_original_memory()->get_internal_params().mem; - } - return nullptr; - } catch (std::exception&) { - return nullptr; - } -} - -bool USMHostAllocator::deallocate(void* /* handle */, const size_t /* bytes */, size_t /* alignment */) noexcept { - try { - _usm_host_tensor = {nullptr, nullptr}; - } catch (std::exception&) { } - return true; -} - -bool USMHostAllocator::is_equal(const USMHostAllocator& other) const { - return other._usm_host_tensor != nullptr && _usm_host_tensor != nullptr && other._usm_host_tensor._ptr == _usm_host_tensor._ptr; -} -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/remote_context.cpp b/src/plugins/intel_gpu/src/plugin/remote_context.cpp index 1b932226881db3..e4aefa00bb0f0f 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_context.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp @@ -6,7 +6,7 @@ #include "openvino/runtime/make_tensor.hpp" #include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/plugin/remote_tensor.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" +#include "intel_gpu/plugin/usm_host_tensor.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/runtime/device_query.hpp" #include @@ -111,8 +111,7 @@ std::shared_ptr RemoteContextImpl::get_this_shared_ptr() { ov::SoPtr RemoteContextImpl::create_host_tensor(const ov::element::Type type, const ov::Shape& shape) { if (m_engine->use_unified_shared_memory()) { - USMHostAllocator allocator(get_this_shared_ptr()); - return { ov::make_tensor(type, shape, allocator), nullptr }; + return { std::make_shared(get_this_shared_ptr(), type, shape), nullptr }; } else { return { ov::make_tensor(type, shape), nullptr }; } diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp index a7c68cd8f81107..cd1011ea153bfe 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp @@ -2,17 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/plugin/remote_tensor.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/plugin/plugin.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" #include namespace ov { namespace intel_gpu { +TensorType RemoteTensorImpl::allocation_type_to_tensor_type(cldnn::allocation_type t) { + switch (t) { + case cldnn::allocation_type::cl_mem: return TensorType::BT_BUF_INTERNAL; + case cldnn::allocation_type::usm_host: return TensorType::BT_USM_HOST_INTERNAL; + case cldnn::allocation_type::usm_device: return TensorType::BT_USM_DEVICE_INTERNAL; + default: return TensorType::BT_EMPTY; + } + + return TensorType::BT_EMPTY; +} + RemoteTensorImpl::RemoteTensorImpl(RemoteContextImpl::Ptr context, const ov::Shape& shape, const ov::element::Type& element_type, @@ -28,20 +40,8 @@ RemoteTensorImpl::RemoteTensorImpl(RemoteContextImpl::Ptr context, , m_mem(mem) , m_surf(surf) , m_plane(plane) { - if (supports_caching()) { - m_hash = cldnn::hash_combine(0, m_mem); - m_hash = cldnn::hash_combine(m_hash, m_surf); - m_hash = cldnn::hash_combine(m_hash, plane); - m_hash = cldnn::hash_combine(m_hash, m_shape.size()); - m_hash = cldnn::hash_combine(m_hash, element_type.hash()); - for (const auto& d : m_shape) { - m_hash = cldnn::hash_combine(m_hash, d); - } - } - - update_strides(); + update_hash(); allocate(); - init_properties(); } RemoteTensorImpl::~RemoteTensorImpl() { @@ -82,12 +82,15 @@ const AnyMap& RemoteTensorImpl::get_properties() const { m_shape = shape; if (ov::shape_size(shape) > m_memory_object->count()) { - OPENVINO_ASSERT(!is_shared(), "Cannot call setShape for Tensor created on top of preallocated memory if shape was increased."); + GPU_DEBUG_TRACE_DETAIL << "Remote realloc" << std::endl; + OPENVINO_ASSERT(!is_shared(), "Cannot call set_shape for Tensor created on top of preallocated memory if shape was increased."); if (!deallocate()) { - OPENVINO_THROW("Cannot deallocate tensor while an attempt to enlarge tensor area in setShape."); + OPENVINO_THROW("Cannot deallocate tensor while an attempt to enlarge tensor area in set_shape."); } allocate(); + } else { + update_strides(); } } @@ -108,23 +111,39 @@ void RemoteTensorImpl::allocate() { if (enable_caching) { m_memory_object = context->try_get_cached_memory(m_hash); - if (m_memory_object) + if (m_memory_object) { + update_properties(); + update_strides(); return; + } } auto& engine = context->get_engine(); + // Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty + // W/A for this issue: + // Allocate with non-empty shape and then reinterprete with original shape + auto shape_copy = m_shape; + for (auto &i : shape_copy) { + if (i == 0) + i = 1; + } + + m_layout.set_partial_shape(shape_copy); + + const bool reset = false; + switch (m_mem_type) { case TensorType::BT_BUF_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem); + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem, reset); break; } case TensorType::BT_USM_HOST_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host); + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host, reset); break; } case TensorType::BT_USM_DEVICE_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device); + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device, reset); break; } case TensorType::BT_BUF_SHARED: { @@ -161,6 +180,9 @@ void RemoteTensorImpl::allocate() { m_memory_object.reset(); } + update_properties(); + update_strides(); + if (enable_caching) context->add_to_cache(m_hash, m_memory_object); } @@ -181,6 +203,19 @@ bool RemoteTensorImpl::supports_caching() const { return is_shared(); } +void RemoteTensorImpl::update_hash() { + if (supports_caching()) { + m_hash = cldnn::hash_combine(0, m_mem); + m_hash = cldnn::hash_combine(m_hash, m_surf); + m_hash = cldnn::hash_combine(m_hash, m_plane); + m_hash = cldnn::hash_combine(m_hash, m_shape.size()); + m_hash = cldnn::hash_combine(m_hash, m_element_type.hash()); + for (const auto& d : m_shape) { + m_hash = cldnn::hash_combine(m_hash, d); + } + } +} + bool RemoteTensorImpl::is_surface() const noexcept { return m_mem_type == TensorType::BT_SURF_SHARED || m_mem_type == TensorType::BT_IMG_SHARED || @@ -196,11 +231,24 @@ cldnn::memory::ptr RemoteTensorImpl::get_original_memory() const { return m_memory_object; } +void RemoteTensorImpl::set_memory(cldnn::memory::ptr memory, size_t actual_size) { + auto engine = m_memory_object->get_engine(); + m_layout = memory->get_layout(); + m_shape = m_layout.get_shape(); + + auto actual_layout = m_layout; + actual_layout.set_partial_shape({ov::Dimension(actual_size)}); + m_memory_object = engine->reinterpret_buffer(*memory, actual_layout); + + update_properties(); + update_strides(); +} + std::shared_ptr RemoteTensorImpl::get_context() const { return m_context; } -void RemoteTensorImpl::init_properties() { +void RemoteTensorImpl::update_properties() { OPENVINO_ASSERT(is_allocated(), "[GPU] Can't initialize RemoteTensorImpl parameters as memory was not allocated"); auto params = m_memory_object->get_internal_params(); diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 6e9e8bbf353803..9c097d222fdc1b 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/plugin/usm_host_tensor.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/core/preprocess/input_tensor_info.hpp" #include "openvino/core/parallel.hpp" @@ -10,7 +11,6 @@ #include "intel_gpu/plugin/sync_infer_request.hpp" #include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/plugin/remote_tensor.hpp" #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/plugin/variable_state.hpp" @@ -41,6 +41,15 @@ inline bool can_use_usm_host(const cldnn::engine& engine) { return can_use_usm; } +inline ov::Shape get_tensor_shape(const ov::PartialShape& pshape) { + ov::Shape res(pshape.size()); + for (size_t i = 0; i < pshape.size(); i++) { + res[i] = pshape[i].is_dynamic() ? 0 : pshape[i].get_length(); + } + + return res; +} + inline std::string get_port_name(const ov::Output& port, const bool is_legacy_api) { std::string name; // TODO: Should use tensor name as the port name, but many legacy tests still use legacy name @@ -72,7 +81,7 @@ void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_p return; if (src_et == dst_et) { - std::memcpy(dst_ptr, src_ptr, size); + std::memcpy(dst_ptr, src_ptr, size * src_et.size()); return; } @@ -167,11 +176,10 @@ bool same_host_mem(cldnn::memory::cptr memory, const uint8_t* host_ptr) { } ov::Shape predict_shape(const std::string& name, const ov::Shape current_shape, ov::element::Type element_type, cldnn::ShapePredictor& shape_predictor) { - auto et_size = cldnn::ceil_div(element_type.bitwidth(), 8); - auto prealloc_info = shape_predictor.predict_preallocation_shape(name, current_shape, et_size, false); + auto prealloc_info = shape_predictor.predict_preallocation_shape(name, current_shape, element_type.bitwidth(), false); const auto& preallocation_shape = prealloc_info.second; auto can_preallocate_buffer = prealloc_info.first && - shape_predictor.can_preallocate(ov::shape_size(preallocation_shape) * et_size); + shape_predictor.can_preallocate(cldnn::ceil_div(ov::shape_size(preallocation_shape) * element_type.bitwidth(), 8)); if (can_preallocate_buffer) { return preallocation_shape; } @@ -426,6 +434,7 @@ void SyncInferRequest::wait() { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait::reinterpret_memory"); OPENVINO_ASSERT(!output_memory->get_layout().data_padding, "[GPU] Unexpected padding in output buffer"); output_memory = m_graph->get_engine().reinterpret_buffer(*output_memory, output_layout); + GPU_DEBUG_TRACE_DETAIL << name << " model output: " << output_memory->buffer_ptr() << std::endl; } OPENVINO_ASSERT(m_user_outputs.count(name) > 0, "[GPU] Output ", name, " is not found in output tensors map"); @@ -434,6 +443,12 @@ void SyncInferRequest::wait() { auto remote_ptr = std::dynamic_pointer_cast(output_tensor); bool is_remote = remote_ptr != nullptr; + if (is_remote) { + GPU_DEBUG_TRACE_DETAIL << name << " handle output tensor (remote): " << remote_ptr->get_original_memory()->buffer_ptr() << std::endl; + } else { + GPU_DEBUG_TRACE_DETAIL << name << " handle output tensor (host): " << output_tensor->data() << std::endl; + } + bool need_output_update = output_layout.bytes_count() == 0 || (output_memory && output_tensor->get_byte_size() != output_memory->size()); if (need_output_update) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait::update_output"); @@ -445,6 +460,19 @@ void SyncInferRequest::wait() { OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); } + if (port.get_partial_shape().is_dynamic()) { + bool need_reallocate = true; + auto usm_host_tensor = std::dynamic_pointer_cast(output_tensor); + if (usm_host_tensor && output_memory) + need_reallocate = usm_host_tensor->get_impl()->get_original_memory()->size() < output_memory->size(); + + if (need_reallocate) { + auto& shape_predictor = m_graph->get_network()->get_shape_predictor(); + auto actual_memory_shape = predict_shape(name, mem_shape, output_tensor->get_element_type(), shape_predictor); + output_tensor->set_shape(actual_memory_shape); + } + } + output_tensor->set_shape(mem_shape); } @@ -454,6 +482,8 @@ void SyncInferRequest::wait() { auto dst_ptr = static_cast(output_tensor->data()); bool same_mem = same_host_mem(output_memory, dst_ptr); if (!same_mem && output_memory->size()) { + GPU_DEBUG_TRACE_DETAIL << name << " copy from: " << output_memory->buffer_ptr() << " to " + << (!is_remote ? output_tensor->data() : remote_ptr->get_original_memory()->buffer_ptr()) << std::endl; if (auto ev = copy_output_data(output_memory, *output_tensor)) { copy_events.push_back(ev); } @@ -493,22 +523,13 @@ void SyncInferRequest::setup_stream_graph() { std::shared_ptr SyncInferRequest::create_host_tensor(const ov::PartialShape& port_shape, const ov::element::Type& port_element_type) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::create_host_tensor"); - // Disable USM usage as USMHostAllocator may fail for attempt to allocate 0 bytes - // If we add WA for such case to avoid driver call, then deallocate method will return false and Blob::setShape call will throw an exception - bool use_usm = m_graph->get_engine().use_unified_shared_memory() && !port_shape.is_dynamic(); - - auto shape = port_shape.is_static() ? port_shape.to_shape() : ov::Shape(port_shape.size(), 0); - auto usm_allocator = USMHostAllocator(m_context); - return use_usm ? ov::make_tensor(port_element_type, shape, usm_allocator) - : ov::make_tensor(port_element_type, shape); + return m_context->create_host_tensor(port_element_type, get_tensor_shape(port_shape))._ptr; } -std::shared_ptr SyncInferRequest::create_device_tensor(const ov::Shape& shape, ov::element::Type element_type, - bool need_lockable_memory, void* mem_ptr) const { +std::shared_ptr SyncInferRequest::create_device_tensor(const ov::PartialShape& port_shape, ov::element::Type element_type, + bool need_lockable_memory) const { TensorType tensor_type = TensorType::BT_EMPTY; - if (mem_ptr) { - tensor_type = TensorType::BT_USM_SHARED; - } else if (m_graph->get_engine().use_unified_shared_memory()) { + if (m_graph->get_engine().use_unified_shared_memory()) { tensor_type = need_lockable_memory ? TensorType::BT_USM_HOST_INTERNAL : TensorType::BT_USM_DEVICE_INTERNAL; } else { tensor_type = TensorType::BT_BUF_INTERNAL; @@ -518,24 +539,10 @@ std::shared_ptr SyncInferRequest::create_device_tensor(const ov::Sh if (!can_use_usm_host(m_graph->get_engine()) && need_lockable_memory) tensor_type = TensorType::BT_BUF_INTERNAL; - // Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty - // W/A for this issue: - // Allocate with non-empty shape and then reinterprete with original shape - auto shape_copy = shape; - for (auto &i : shape_copy) { - if (i == 0) - i = 1; - } - return std::make_shared(m_context, - shape_copy, + get_tensor_shape(port_shape), element_type, - tensor_type, - mem_ptr); -} - -std::shared_ptr SyncInferRequest::create_shared_device_tensor(const ov::Shape& shape, ov::element::Type element_type, void* usm_host_mem) const { - return create_device_tensor(shape, element_type, false, usm_host_mem); + tensor_type); } TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrapper& user_tensor_wrapper, @@ -547,17 +554,12 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe auto tensor_shape = user_tensor->get_shape(); bool is_dynamic = port_pshape.is_dynamic(); OPENVINO_ASSERT(std::dynamic_pointer_cast(user_tensor) == nullptr, "[GPU] Unexpected remote tensor"); - auto input_ptr = user_tensor->data(); - const auto alloc_type = m_graph->get_engine().detect_usm_allocation_type(input_ptr); - const auto is_usm_host = alloc_type == cldnn::allocation_type::usm_host; - bool can_share = is_usm_host && - !is_convert_required(user_tensor->get_element_type(), element_type) && + auto usm_host_tensor = std::dynamic_pointer_cast(user_tensor); + bool can_share = usm_host_tensor != nullptr && !is_convert_required(user_tensor->get_element_type(), element_type) && can_use_usm_host(m_graph->get_engine()); if (can_share) { - // For USM case we create host blob using custom USM host allocator - // and then create shared device blob on top of this buffer - return { create_shared_device_tensor(tensor_shape, element_type, input_ptr), user_tensor_wrapper.owner }; + return { usm_host_tensor->get_impl(), user_tensor_wrapper.owner }; } auto actual_memory_shape = tensor_shape; @@ -690,13 +692,17 @@ std::vector SyncInferRequest::prepare_batched_input(const std std::vector SyncInferRequest::prepare_input(const std::string& name, const ov::Output& port, const TensorWrapper& user_tensor_wrapper) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::prepare_input"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, openvino::itt::handle("SyncInferRequest::prepare_input: " + name)); auto pshape = port.get_partial_shape(); auto is_dynamic = pshape.is_dynamic(); auto user_tensor = user_tensor_wrapper.ptr; auto element_type = user_tensor->get_element_type(); + auto remote_ptr = std::dynamic_pointer_cast(user_tensor); + auto usm_host_ptr = std::dynamic_pointer_cast(user_tensor); bool is_remote = remote_ptr != nullptr; + bool is_usm_host_tensor = usm_host_ptr != nullptr; + GPU_DEBUG_TRACE_DETAIL << "Prepare input for " << name << " ( is_remote ? " << is_remote << ")" << std::endl; GPU_DEBUG_TRACE_DETAIL << " port shape : " << pshape.to_string() << std::endl; GPU_DEBUG_TRACE_DETAIL << " user_tensor shape: " << user_tensor->get_shape().to_string() << std::endl; @@ -714,12 +720,16 @@ std::vector SyncInferRequest::prepare_input(const std::string user_tensor->get_shape(), ") are incompatible"); + auto device_tensor_et = convert_to_supported_device_type(element_type); + bool convert_needed = is_convert_required(element_type, device_tensor_et); + if (is_remote) { m_plugin_inputs[name] = user_tensor_wrapper; + } else if (is_usm_host_tensor && !convert_needed) { + m_plugin_inputs[name] = {usm_host_ptr->get_impl(), user_tensor_wrapper.owner}; + is_remote = true; } - auto device_tensor_et = convert_to_supported_device_type(element_type); - bool convert_needed = is_convert_required(element_type, device_tensor_et); bool update_device_tensor = m_plugin_inputs.count(name) == 0 || (m_plugin_inputs[name].owner == TensorOwner::USER && !is_remote); if (update_device_tensor) { @@ -781,6 +791,7 @@ std::vector SyncInferRequest::prepare_input(const std::string } } + GPU_DEBUG_TRACE_DETAIL << name << " prepare input: " << memory->buffer_ptr() << std::endl; const cldnn::primitive_id internal_name = "parameter:" + name; network->set_input_data(internal_name, memory); @@ -840,6 +851,7 @@ std::vector SyncInferRequest::prepare_output(const std::strin auto output_tensor = std::dynamic_pointer_cast(m_plugin_outputs.at(name).ptr); auto output_memory = output_tensor->get_memory(); + GPU_DEBUG_TRACE_DETAIL << name << " prepare output: " << output_memory->buffer_ptr() << std::endl; return network->set_output_memory(internal_name, output_memory); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index a1c4d60b81977c..0ff0e1fd0bf258 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -3,16 +3,19 @@ // #include "convert_fc_to_compressed.hpp" +#include #include "intel_gpu/op/fully_connected.hpp" #include "intel_gpu/op/fully_connected_compressed.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/subtract.hpp" #include "openvino/op/matmul.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/reshape.hpp" #include "openvino/core/rt_info.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/op/or.hpp" #include "transformations/utils/utils.hpp" @@ -23,7 +26,19 @@ namespace intel_gpu { ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed() { using namespace ov::pass::pattern; - auto weights_m = wrap_type(consumers_count(1)); + auto compressed_constant = [](const ov::Output& output) { + return (output.get_element_type() == ov::element::u8 || + output.get_element_type() == ov::element::i8) && + output.get_target_inputs().size() == 1; + }; + + auto reshape_3d_to_2d = [](const ov::Output& output) { + auto in_ps = output.get_node()->get_input_partial_shape(0); + auto out_ps = output.get_node()->get_output_partial_shape(0); + return in_ps.rank().is_static() && out_ps.rank().is_static() && in_ps.size() == 3 && out_ps.size() == 2; + }; + + auto weights_m = wrap_type(compressed_constant); auto convert_m = wrap_type({weights_m}); auto sub_const_m = wrap_type(consumers_count(1)); @@ -34,11 +49,15 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon auto mul_no_sub_m = wrap_type({convert_m, mul_const_m}); auto mul_m = std::make_shared(OutputVector{mul_with_sub_m, mul_no_sub_m}); + auto reshape_const_m = wrap_type(); + auto reshape_m = wrap_type({mul_m, reshape_const_m}, reshape_3d_to_2d); + + auto transpose_input = std::make_shared(OutputVector{reshape_m, mul_m}); auto transpose_const_m = wrap_type(); - auto transpose_m = wrap_type({mul_m, transpose_const_m}); - auto weights_input_m = std::make_shared(ov::OutputVector{mul_m, transpose_m}); + auto transpose_m = wrap_type({transpose_input, transpose_const_m}); auto data_m = any_input(); + auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m, mul_m}); auto fully_connected_m = wrap_type({data_m, weights_input_m}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { @@ -52,53 +71,73 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon return false; } + bool has_transpose = pattern_map.count(transpose_m); + auto scale_shape = pattern_map.at(mul_const_m).get_shape(); + bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) { return d > 1; }) > 1; + + auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr node) { + auto constant = std::dynamic_pointer_cast(node); + OPENVINO_ASSERT(constant != nullptr); + ov::Shape current_shape = constant->get_shape(); + if (current_shape.size() == 2) + return constant; + OPENVINO_ASSERT(current_shape.size() == 3); + + auto new_shape = (has_transpose || !grouped) ? ov::Shape{current_shape[0] * current_shape[1], current_shape[2]} + : ov::Shape{current_shape[0], current_shape[1] * current_shape[2]}; + + return std::make_shared(*constant, new_shape); + }; + const auto& fc_input_a = fc->get_input_node_shared_ptr(0); - const auto& scale = pattern_map.at(mul_const_m).get_node_shared_ptr(); + const auto& scale = reshape_const_to_2d(pattern_map.at(mul_const_m).get_node_shared_ptr()); std::shared_ptr optional_zero_point = nullptr; - ov::NodeVector nodes_to_copy_info{pattern_map.at(fully_connected_m).get_node_shared_ptr(), - pattern_map.at(convert_m).get_node_shared_ptr()}; - if (pattern_map.count(mul_no_sub_m)) { - nodes_to_copy_info.push_back(pattern_map.at(mul_no_sub_m).get_node_shared_ptr()); - } - if (pattern_map.count(mul_with_sub_m)) { - nodes_to_copy_info.push_back(pattern_map.at(mul_with_sub_m).get_node_shared_ptr()); - } - const bool with_zero_point = pattern_map.count(subtract_m) > 0; if (with_zero_point) { - optional_zero_point = pattern_map.at(sub_const_m).get_node_shared_ptr(); - nodes_to_copy_info.push_back(subtract_m); + optional_zero_point = reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr()); } - std::shared_ptr fc_input_b = pattern_map.at(weights_m).get_node_shared_ptr(); - if (pattern_map.count(transpose_m)) { + std::shared_ptr fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr()); + std::shared_ptr fc_input_scale = scale; + std::shared_ptr fc_input_zp = optional_zero_point; + if (has_transpose) { const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); - const auto& transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); + std::shared_ptr transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); + if (ov::shape_size(transpose_const->get_shape()) != fc_input_b->get_output_partial_shape(0).size()) { + std::vector new_order(fc_input_b->get_output_partial_shape(0).size()); + std::iota(new_order.begin(), new_order.end(), 0); + std::swap(new_order[new_order.size() - 1], new_order[new_order.size() - 2]); + transpose_const = std::make_shared(ov::element::i32, ov::Shape{new_order.size()}, new_order); + } + fc_input_b = transpose->clone_with_new_inputs({ fc_input_b->output(0), transpose_const }); + fc_input_scale = transpose->clone_with_new_inputs({ scale->output(0), transpose_const }); + if (with_zero_point) + fc_input_zp = transpose->clone_with_new_inputs({ optional_zero_point->output(0), transpose_const }); } std::shared_ptr new_fc = nullptr; if (with_zero_point) { new_fc = std::make_shared(fc_input_a, fc_input_b, - scale, - optional_zero_point, + fc_input_scale, + fc_input_zp, fc->get_output_type()); } else { new_fc = std::make_shared(fc_input_a, fc_input_b, - scale, + fc_input_scale, fc->get_output_type()); } new_fc->set_friendly_name(fc->get_friendly_name()); - ov::copy_runtime_info(nodes_to_copy_info, new_fc); + ov::copy_runtime_info(m.get_matched_nodes(), new_fc); ov::replace_node(fc, new_fc); return true; }; - auto m = std::make_shared(fully_connected_m); + auto m = std::make_shared(fully_connected_m, "ConvertFullyConnectedToFullyConnectedCompressed"); this->register_matcher(m, callback); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp index a30c88e7d1492d..2caf3cd4d69850 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp @@ -160,7 +160,7 @@ ConvertMatMulToFullyConnected::ConvertMatMulToFullyConnected() { return true; }; - auto m = std::make_shared(matmul_m); + auto m = std::make_shared(matmul_m, "ConvertMatMulToFullyConnected"); this->register_matcher(m, callback); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 14122656fc5145..10275dae95d729 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -102,6 +102,7 @@ #include "transformations/op_conversions/convert_prior_box_v8_to_v0.hpp" #include "transformations/op_conversions/convert_shapeof3.hpp" #include "transformations/op_conversions/convert_topk11_downgrade.hpp" +#include "transformations/op_conversions/group_normalization_decomposition.hpp" #include "transformations/op_conversions/eye_decomposition.hpp" #include "transformations/op_conversions/convert_pad12_downgrade.hpp" #include "transformations/convert_precision.hpp" @@ -488,6 +489,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->disable(); pass_config->disable(); pass_config->disable(); + pass_config->disable(); pass_config->enable(); diff --git a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp new file mode 100644 index 00000000000000..bcb0877b521f20 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/usm_host_tensor.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include + +namespace ov { +namespace intel_gpu { + +USMHostTensor::USMHostTensor(std::shared_ptr context, const element::Type element_type, const Shape& shape) + : m_impl(std::make_shared(context, shape, element_type, TensorType::BT_USM_HOST_INTERNAL)) {} + +USMHostTensor::USMHostTensor(std::shared_ptr tensor) + : m_impl(tensor) {} + +void* USMHostTensor::data(const element::Type& element_type) const { + return m_impl->get_original_memory()->buffer_ptr(); +} + +const element::Type& USMHostTensor::get_element_type() const { + return m_impl->get_element_type(); +} + +const Shape& USMHostTensor::get_shape() const { + return m_impl->get_shape(); +} + +const Strides& USMHostTensor::get_strides() const { + return m_impl->get_strides(); +} + +void USMHostTensor::set_shape(ov::Shape new_shape) { + m_impl->set_shape(new_shape); +} + +void USMHostTensor::set_memory(std::shared_ptr tensor) { + OPENVINO_ASSERT(tensor->get_original_memory()->get_allocation_type() == cldnn::allocation_type::usm_host, "[GPU] Unexpected allocation type"); + m_impl = tensor; +} + +std::shared_ptr USMHostTensor::get_impl() const { + return m_impl; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index e1375ef14ddb47..8bcb853a4c3090 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -48,6 +48,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), std::make_tuple(ov::hint::num_requests, 0), + std::make_tuple(ov::hint::enable_cpu_pinning, false), std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), @@ -86,7 +87,7 @@ void ExecutionConfig::set_property(const AnyMap& config) { for (auto& kv : config) { auto& name = kv.first; auto& val = kv.second; - OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attepmpt to set property ", name, " (", val.as(), ") which was not registered!\n"); + OPENVINO_ASSERT(is_supported(kv.first), "[GPU] Attempt to set property ", name, " (", val.as(), ") which was not registered!\n"); OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": ", val.as()); internal_properties[name] = val; } @@ -108,7 +109,7 @@ void ExecutionConfig::set_user_property(const AnyMap& config) { auto& name = kv.first; auto& val = kv.second; bool supported = is_supported(name) && supported_properties.at(name) == PropertyVisibility::PUBLIC; - OPENVINO_ASSERT(supported, "[GPU] Attepmpt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); + OPENVINO_ASSERT(supported, "[GPU] Attempt to set user property ", name, " (", val.as(), ") which was not registered or internal!\n"); OPENVINO_ASSERT(property_validators.at(name)->is_valid(val), "[GPU] Invalid value for property ", name, ": `", val.as(), "`"); user_properties[kv.first] = kv.second; diff --git a/src/plugins/intel_gpu/src/runtime/format.cpp b/src/plugins/intel_gpu/src/runtime/format.cpp index bd31583493ea71..095bda14bbf97d 100644 --- a/src/plugins/intel_gpu/src/runtime/format.cpp +++ b/src/plugins/intel_gpu/src/runtime/format.cpp @@ -85,6 +85,7 @@ static const std::map format_traits_map { FMT_TRAITS(oizyx, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {}), FMT_TRAITS(iozyx, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "oixyz", {}), FMT_TRAITS(os_is_yx_isv16_osv16, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 16}, {0, 16}}), + FMT_TRAITS(o_is_yx_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 2}}), FMT_TRAITS(o_is_yx_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 4}}), FMT_TRAITS(o_is_yx_isv16, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 16}}), FMT_TRAITS(o_is_zyx_isv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 16}}), diff --git a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp index 2d398ee89ff1de..1ff00c905bd073 100644 --- a/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp +++ b/src/plugins/intel_gpu/src/runtime/shape_predictor.cpp @@ -58,7 +58,7 @@ bool ShapePredictor::can_preallocate(size_t desired_buffer_size) { std::pair ShapePredictor::predict_preallocation_shape(const std::string& id, const ov::Shape& current_shape, - size_t dt_size, + size_t dt_bitwidth, bool can_reuse_buffer) { add_shape(id, current_shape); @@ -110,7 +110,7 @@ std::pair ShapePredictor::predict_preallocation_shape(const std for (size_t i = 0; i < current_shape.size(); ++i) single_iter_shape.push_back(diffs[0][i] == 0 ? current_shape[i] : 1); - if (ov::shape_size(single_iter_shape) * dt_size > _max_per_iter_size) + if (ceil_div(ov::shape_size(single_iter_shape) * dt_bitwidth, 8) > _max_per_iter_size) can_use_iterations_preallocation = false; } diff --git a/src/plugins/intel_gpu/tests/common/subgraphs_builders.hpp b/src/plugins/intel_gpu/tests/common/subgraphs_builders.hpp new file mode 100644 index 00000000000000..dea703cf7104b2 --- /dev/null +++ b/src/plugins/intel_gpu/tests/common/subgraphs_builders.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "openvino/core/dimension.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/concat.hpp" + +namespace tests { + +inline std::shared_ptr make_llm_kv_cache_pattern(ov::Dimension batch = ov::Dimension::dynamic(), + ov::Dimension n_heads = ov::Dimension::dynamic(), + ov::Dimension n_features = ov::Dimension::dynamic(), + ov::element::Type_t element_type = ov::element::f32) { + ov::PartialShape kv_cache_size = {batch, n_heads, -1, n_features}; + ov::PartialShape new_token_size = {batch, -1, n_heads, n_features}; + ov::PartialShape matmul_in_size = {batch, n_heads, -1, -1}; + + auto in_kv_prev = std::make_shared(element_type, kv_cache_size); + in_kv_prev->set_friendly_name("past_key_values"); + auto in_new_token = std::make_shared(element_type, new_token_size); + in_new_token->set_friendly_name("new_token_input"); + auto in_matmul = std::make_shared(element_type, matmul_in_size); + in_matmul->set_friendly_name("in_matmul"); + + auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, {new_token_size.size()}, {0, 2, 1, 3}); + auto transpose = std::make_shared(in_new_token, transpose_const); + auto concat = std::make_shared(ov::OutputVector{in_kv_prev, transpose}, 2); + auto convert = std::make_shared(concat, element_type); + auto kv_present = std::make_shared(convert); + kv_present->set_friendly_name("present_key_values"); + auto matmul = std::make_shared(in_matmul, concat, false, false); + auto matmul_out = std::make_shared(matmul); + matmul_out->set_friendly_name("matmul_out"); + + ov::ParameterVector params{in_kv_prev, in_new_token, in_matmul}; + ov::ResultVector results{kv_present, matmul_out}; + return std::make_shared(results, params, "LLM-KV-Cache"); +} + +} // namespace tests diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp new file mode 100644 index 00000000000000..4bcef9a7bedbe0 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include "behavior/ov_infer_request/iteration_chaining.hpp" +#include "common_test_utils/test_constants.hpp" +#include "openvino/runtime/properties.hpp" + +using namespace ov::test::behavior; + +namespace { + +const std::vector configs = { + { ov::hint::inference_precision(ov::element::f32) } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVIterationChaining, + ::testing::Combine( + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::ValuesIn(configs)), + OVIterationChaining::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp index 31c4d4884f05d1..39711f74105596 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -42,6 +42,7 @@ const std::vector gpu_setcore_properties = { const std::vector gpu_compileModel_properties = { {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY), ov::hint::num_requests(10), + ov::hint::enable_cpu_pinning(true), ov::enable_profiling(true)}}; INSTANTIATE_TEST_SUITE_P(smoke_gpuCompileModelBehaviorTests, diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/broadcast.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/broadcast.cpp index 84234e94bbd347..3387375a0c07c1 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/broadcast.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/broadcast.cpp @@ -4,25 +4,24 @@ #include -#include "single_layer_tests/broadcast.hpp" +#include "single_op_tests/broadcast.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { - -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8 +using ov::test::BroadcastLayerTest; +using ov::test::BroadcastParamsTuple; +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i32, + ov::element::i8, + ov::element::u8 }; -const std::vector inputTPrecisions = { - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::BOOL +const std::vector inputTPrecisions = { + ov::element::f16, + ov::element::i16, + ov::element::boolean }; // NUMPY MODE ////////////////////////////////////////// @@ -31,12 +30,16 @@ std::vector> targetShapesNumpy0D = { {}, }; +std::vector> input_shapes_0d_static = { + {{}} +}; + INSTANTIATE_TEST_CASE_P(smoke_TestNumpyBroadcast0D, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesNumpy0D), - ::testing::Values(ngraph::AxisSet{}), // not used in numpy mode - ::testing::Values(ngraph::op::BroadcastType::NUMPY), - ::testing::Values(std::vector{}), + ::testing::Values(ov::AxisSet{}), // not used in numpy mode + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_0d_static)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); @@ -53,12 +56,16 @@ std::vector> targetShapesNumpy1D = { {1, 4, 4}, }; +std::vector> input_shapes_1d_static = { + {{1}} +}; + INSTANTIATE_TEST_CASE_P(smoke_TestNumpyBroadcast1D, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesNumpy1D), - ::testing::Values(ngraph::AxisSet{}), // not used in numpy mode - ::testing::Values(ngraph::op::BroadcastType::NUMPY), - ::testing::Values(std::vector{1}), + ::testing::Values(ov::AxisSet{}), // not used in numpy mode + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_1d_static)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); @@ -66,9 +73,9 @@ INSTANTIATE_TEST_CASE_P(smoke_TestNumpyBroadcast1D, INSTANTIATE_TEST_CASE_P(smoke_PrecTransformation, BroadcastLayerTest, ::testing::Combine( ::testing::Values(targetShapesNumpy1D[0]), - ::testing::Values(ngraph::AxisSet{}), //not used in numpy mode - ::testing::Values(ngraph::op::BroadcastType::NUMPY), - ::testing::Values(std::vector{1}), + ::testing::Values(ov::AxisSet{}), //not used in numpy mode + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_1d_static)), ::testing::ValuesIn(inputTPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); @@ -81,12 +88,16 @@ std::vector> targetShapesNumpy2D = { {2, 2, 3, 6}, }; +std::vector> input_shapes_2d_static = { + {{3, 1}} +}; + INSTANTIATE_TEST_CASE_P(smoke_TestNumpyBroadcast2D, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesNumpy2D), ::testing::Values(ngraph::AxisSet{}), // not used in numpy mode ::testing::Values(ngraph::op::BroadcastType::NUMPY), - ::testing::Values(std::vector{3, 1}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_2d_static)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); @@ -99,42 +110,63 @@ std::vector> targetShapesNumpy3D = { {2, 1, 1, 4, 4}, }; +std::vector> input_shapes_3d_static = { + {{1, 4, 1}} +}; + + INSTANTIATE_TEST_CASE_P(smoke_TestNumpyBroadcast3D, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesNumpy3D), - ::testing::Values(ngraph::AxisSet{}), // not used in numpy mode - ::testing::Values(ngraph::op::BroadcastType::NUMPY), - ::testing::Values(std::vector{1, 4, 1}), + ::testing::Values(ov::AxisSet{}), // not used in numpy mode + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_3d_static)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); +std::vector> targetShapesNumpy6D = { + {1, 2, 3, 4, 5, 6}, +}; + +std::vector> input_shapes_6d_static = { + {{1, 2, 1, 4, 1, 6}} +}; + INSTANTIATE_TEST_CASE_P(smoke_TestNumpyBroadcast6D, BroadcastLayerTest, - ::testing::Combine(::testing::Values(std::vector{1, 2, 3, 4, 5, 6}), - ::testing::Values(ngraph::AxisSet{}), // not used in numpy mode - ::testing::Values(ngraph::op::BroadcastType::NUMPY), - ::testing::Values(std::vector{1, 2, 1, 4, 1, 6}), + ::testing::Combine(::testing::ValuesIn(targetShapesNumpy6D), + ::testing::Values(ov::AxisSet{}), // not used in numpy mode + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_6d_static)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); +std::vector> targetShapesNumpy5D = { + {1, 2, 3, 4, 5}, +}; + +std::vector> input_shapes_5d_static = { + {{1, 2, 1, 4, 1}} +}; + INSTANTIATE_TEST_CASE_P(smoke_TestNumpyBroadcast5D, BroadcastLayerTest, - ::testing::Combine(::testing::Values(std::vector{1, 2, 3, 4, 5}), - ::testing::Values(ngraph::AxisSet{}), // not used in numpy mode - ::testing::Values(ngraph::op::BroadcastType::NUMPY), - ::testing::Values(std::vector{1, 2, 1, 4, 1}), + ::testing::Combine(::testing::ValuesIn(targetShapesNumpy5D), + ::testing::Values(ov::AxisSet{}), // not used in numpy mode + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_5d_static)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); // END NUMPY MODE ////////////////////////////////////// // BIDIRECTIONAL MODE ////////////////////////////////// -std::vector> inShapesBidi = { - {4, 1}, - {1, 4, 1}, - {4, 1, 1} +std::vector> inShapesBidi = { + {{4, 1}}, + {{1, 4, 1}}, + {{4, 1, 1}} }; std::vector> targetShapesBidi = { @@ -146,25 +178,25 @@ std::vector> targetShapesBidi = { INSTANTIATE_TEST_CASE_P(smoke_TestBidirectionalBroadcast, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesBidi), - ::testing::Values(ngraph::AxisSet{}), // not used in bidirectional mode - ::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL), - ::testing::ValuesIn(inShapesBidi), + ::testing::Values(ov::AxisSet{}), // not used in bidirectional mode + ::testing::Values(ov::op::BroadcastType::BIDIRECTIONAL), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapesBidi)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); // EXPLICIT MODE /////////////////////////////////////// // 1D -std::vector> inShapesExplicit1D = { {4} }; +std::vector> inShapesExplicit1D = { {{4}} }; std::vector> targetShapesExplicit1D = { {4, 2, 4}, {4, 2, 4, 1} }; -std::vector axes1D = { {0}, {2} }; +std::vector axes1D = { {0}, {2} }; INSTANTIATE_TEST_CASE_P(smoke_TestExplicitBroadcast1D, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesExplicit1D), ::testing::ValuesIn(axes1D), - ::testing::Values(ngraph::op::BroadcastType::EXPLICIT), - ::testing::ValuesIn(inShapesExplicit1D), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapesExplicit1D)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); @@ -172,18 +204,18 @@ INSTANTIATE_TEST_CASE_P(smoke_TestExplicitBroadcast1D, INSTANTIATE_TEST_SUITE_P(smoke_TestBidirectionalBroadcast3, BroadcastLayerTest, ::testing::Combine(::testing::Values(targetShapesBidi[2]), - ::testing::Values(ngraph::AxisSet{}), // not used in bidirectional mode - ::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL), - ::testing::Values(inShapesBidi[2]), + ::testing::Values(ov::AxisSet{}), // not used in bidirectional mode + ::testing::Values(ov::op::BroadcastType::BIDIRECTIONAL), + ::testing::Values(ov::test::static_shapes_to_test_representation(inShapesBidi[2])), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); // EXPLICIT MODE -std::vector> inShapesExplicit = { - {3, 1}, - {2, 4} +std::vector> inShapesExplicit = { + {{3, 1}}, + {{2, 4}} }; std::vector> targetShapesExplicit = { @@ -192,34 +224,33 @@ std::vector> targetShapesExplicit = { }; // 2D -std::vector> inShapesExplicit2D = { {2, 4} }; +std::vector> inShapesExplicit2D = { {{2, 4}} }; std::vector> targetShapesExplicit2D = { {2, 2, 4}, {2, 2, 4, 1}}; -std::vector axes2D = { {1, 2}, {0, 2} }; +std::vector axes2D = { {1, 2}, {0, 2} }; INSTANTIATE_TEST_CASE_P(smoke_TestExplicitBroadcast2D, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesExplicit2D), ::testing::ValuesIn(axes2D), - ::testing::Values(ngraph::op::BroadcastType::EXPLICIT), - ::testing::ValuesIn(inShapesExplicit2D), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapesExplicit2D)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); // 3D -std::vector> inShapesExplicit3D = { {2, 2, 2} }; +std::vector> inShapesExplicit3D = { {{2, 2, 2}} }; std::vector> targetShapesExplicit3D = { {2, 2, 2, 2} }; -std::vector axes3D = { {0, 1, 2}, {0, 1, 3}, {0, 2, 3}, {1, 2, 3} }; +std::vector axes3D = { {0, 1, 2}, {0, 1, 3}, {0, 2, 3}, {1, 2, 3} }; INSTANTIATE_TEST_CASE_P(smoke_TestExplicitBroadcast3D, BroadcastLayerTest, ::testing::Combine(::testing::ValuesIn(targetShapesExplicit3D), ::testing::ValuesIn(axes3D), - ::testing::Values(ngraph::op::BroadcastType::EXPLICIT), - ::testing::ValuesIn(inShapesExplicit3D), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapesExplicit3D)), ::testing::ValuesIn(inputPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), BroadcastLayerTest::getTestCaseName); // END EXPLICIT MODE /////////////////////////////////// - } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/bucketize.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/bucketize.cpp index b658cfc840fe06..89da385392745f 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/bucketize.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/bucketize.cpp @@ -2,31 +2,37 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/bucketize.hpp" +#include "single_op_tests/bucketize.hpp" #include -using namespace LayerTestsDefinitions; +using ov::test::BucketizeLayerTest; namespace { -const std::vector> data_shapes = { - // No reason to test other ranks as logic is the same - {40, 22, 13, 9}, // 4D - {6, 7, 3, 2, 8}, // 5D - {6, 7, 3, 2, 8, 5}, // 6D -}; - -const std::vector> buckets_shapes = { - {5}, - {100}, +const std::vector> input_shapes_static = { + {{40, 22, 13, 9}, {5}}, + {{6, 7, 3, 2, 8}, {5}}, + {{6, 7, 3, 2, 8, 5}, {5}}, + {{40, 22, 13, 9}, {100}}, + {{6, 7, 3, 2, 8}, {100}}, + {{6, 7, 3, 2, 8, 5}, {100}}, }; const std::vector with_right_bound = {true, false}; -const std::vector out_precision = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, +const std::vector out_precision = { + ov::element::i32, + ov::element::i64 +}; + +const std::vector in_buckets_precision = { + ov::element::f16, + ov::element::f32, + ov::element::i32, + ov::element::i64, + ov::element::i8, + ov::element::u8 }; // We won't test FP32 and FP16 together as it won't make sense for now @@ -34,94 +40,68 @@ const std::vector out_precision = { INSTANTIATE_TEST_SUITE_P(smoke_Bucketize_input_fp16, BucketizeLayerTest, - testing::Combine(testing::ValuesIn(data_shapes), - testing::ValuesIn(buckets_shapes), + testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), testing::ValuesIn(with_right_bound), - testing::Values(InferenceEngine::Precision::FP16), - testing::Values(InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8), + testing::Values(ov::element::f16), + testing::Values(ov::element::f16, + ov::element::i32, + ov::element::i64, + ov::element::i8, + ov::element::u8), testing::ValuesIn(out_precision), testing::Values(ov::test::utils::DEVICE_GPU)), BucketizeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Bucketize_input_fp32, BucketizeLayerTest, - testing::Combine(testing::ValuesIn(data_shapes), - testing::ValuesIn(buckets_shapes), + testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), testing::ValuesIn(with_right_bound), - testing::Values(InferenceEngine::Precision::FP32), - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8), + testing::Values(ov::element::f32), + testing::Values(ov::element::f32, + ov::element::i32, + ov::element::i64, + ov::element::i8, + ov::element::u8), testing::ValuesIn(out_precision), testing::Values(ov::test::utils::DEVICE_GPU)), BucketizeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Bucketize_input_i32, BucketizeLayerTest, - testing::Combine(testing::ValuesIn(data_shapes), - testing::ValuesIn(buckets_shapes), + testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), testing::ValuesIn(with_right_bound), - testing::Values(InferenceEngine::Precision::I32), - testing::Values(InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8), + testing::Values(ov::element::i32), + testing::ValuesIn(in_buckets_precision), testing::ValuesIn(out_precision), testing::Values(ov::test::utils::DEVICE_GPU)), BucketizeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Bucketize_input_i64, BucketizeLayerTest, - testing::Combine(testing::ValuesIn(data_shapes), - testing::ValuesIn(buckets_shapes), + testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), testing::ValuesIn(with_right_bound), - testing::Values(InferenceEngine::Precision::I64), - testing::Values(InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8), + testing::Values(ov::element::i64), + testing::ValuesIn(in_buckets_precision), testing::ValuesIn(out_precision), testing::Values(ov::test::utils::DEVICE_GPU)), BucketizeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Bucketize_input_i8, BucketizeLayerTest, - testing::Combine(testing::ValuesIn(data_shapes), - testing::ValuesIn(buckets_shapes), + testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), testing::ValuesIn(with_right_bound), - testing::Values(InferenceEngine::Precision::I8), - testing::Values(InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8), + testing::Values(ov::element::i8), + testing::ValuesIn(in_buckets_precision), testing::ValuesIn(out_precision), testing::Values(ov::test::utils::DEVICE_GPU)), BucketizeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Bucketize_input_u8, BucketizeLayerTest, - testing::Combine(testing::ValuesIn(data_shapes), - testing::ValuesIn(buckets_shapes), + testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), testing::ValuesIn(with_right_bound), - testing::Values(InferenceEngine::Precision::U8), - testing::Values(InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8), + testing::Values(ov::element::u8), + testing::ValuesIn(in_buckets_precision), testing::ValuesIn(out_precision), testing::Values(ov::test::utils::DEVICE_GPU)), BucketizeLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp index 0d0aa1fe704b77..a6dfee6b36d641 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/concat.cpp @@ -4,35 +4,30 @@ #include -#include "single_layer_tests/concat.hpp" +#include "single_op_tests/concat.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::ConcatLayerTest; std::vector axes = {-3, -2, -1, 0, 1, 2, 3}; -std::vector>> inShapes = { +std::vector> inShapes = { {{10, 10, 10, 10}}, {{10, 10, 10, 10}, {10, 10, 10, 10}}, {{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}}, {{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}}, {{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}} }; -std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I64}; +std::vector netPrecisions = {ov::element::f32, + ov::element::f16, + ov::element::i64}; INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, ConcatLayerTest, ::testing::Combine( ::testing::ValuesIn(axes), - ::testing::ValuesIn(inShapes), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapes)), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConcatLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp index 2206490567e7e2..b7c2807ce37086 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder.cpp @@ -3,35 +3,33 @@ // #include -#include "single_layer_tests/ctc_greedy_decoder.hpp" +#include "single_op_tests/ctc_greedy_decoder.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::helpers; - namespace { +using ov::test::CTCGreedyDecoderLayerTest; + // Common params -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; std::vector mergeRepeated{true, false}; +std::vector> input_shapes_static = { + {{ 50, 3, 3 }}, + {{ 50, 3, 7 }}, + {{ 50, 3, 8 }}, + {{ 50, 3, 16 }}, + {{ 50, 3, 128 }}, + {{ 50, 3, 49 }}, + {{ 50, 3, 55 }}, + {{ 1, 1, 16 }}}; + INSTANTIATE_TEST_SUITE_P(smoke_CtcGreedyDecoderBasic, CTCGreedyDecoderLayerTest, ::testing::Combine(::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({50, 3, 3}), - std::vector({50, 3, 7}), - std::vector({50, 3, 8}), - std::vector({50, 3, 16}), - std::vector({50, 3, 128}), - std::vector({50, 3, 49}), - std::vector({50, 3, 55}), - std::vector({1, 1, 16})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::ValuesIn(mergeRepeated), ::testing::Values(ov::test::utils::DEVICE_GPU)), CTCGreedyDecoderLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder_seq_len.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder_seq_len.cpp index c015258d41ed24..12d318d107d342 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder_seq_len.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_greedy_decoder_seq_len.cpp @@ -3,30 +3,28 @@ // #include -#include "single_layer_tests/ctc_greedy_decoder_seq_len.hpp" +#include "single_op_tests/ctc_greedy_decoder_seq_len.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::helpers; - namespace { +using ov::test::CTCGreedyDecoderSeqLenLayerTest; -std::vector> inputShape{{1, 1, 1}, {1, 6, 10}, {3, 3, 16}, {5, 3, 55}}; +std::vector> inputShape{{{1, 1, 1}}, {{1, 6, 10}}, {{3, 3, 16}}, {{5, 3, 55}}}; -const std::vector probPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector probPrecisions = { + ov::element::f32, + ov::element::f16 }; -const std::vector idxPrecisions = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64 +const std::vector idxPrecisions = { + ov::element::i32, + ov::element::i64 }; std::vector mergeRepeated{true, false}; INSTANTIATE_TEST_SUITE_P(smoke_set1, CTCGreedyDecoderSeqLenLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShape), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShape)), ::testing::Values(10), ::testing::ValuesIn(probPrecisions), ::testing::ValuesIn(idxPrecisions), @@ -37,8 +35,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_set1, INSTANTIATE_TEST_SUITE_P(smoke_set2, CTCGreedyDecoderSeqLenLayerTest, - ::testing::Combine(::testing::ValuesIn(std::vector>{{2, 8, 11}, - {4, 10, 55}}), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(std::vector>{{{2, 8, 11}}, + {{4, 10, 55}}})), ::testing::ValuesIn(std::vector{5, 100}), ::testing::ValuesIn(probPrecisions), ::testing::ValuesIn(idxPrecisions), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_loss.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_loss.cpp index 9e71f45de30034..742b4974e6fc88 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_loss.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/ctc_loss.cpp @@ -2,22 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/ctc_loss.hpp" +#include "single_op_tests/ctc_loss.hpp" #include -using namespace LayerTestsDefinitions; - namespace { +using ov::test::CTCLossLayerTest; -const std::vector fPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +const std::vector fPrecisions = { + ov::element::f32, + ov::element::f16, }; -const std::vector iPrecisions = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, +const std::vector iPrecisions = { + ov::element::i32, + ov::element::i64, }; const std::vector preprocessCollapseRepeated = {true, false}; @@ -25,7 +24,6 @@ const std::vector ctcMergeRepeated = {true, false}; const std::vector unique = {true, false}; const auto ctcLossArgsSubset1 = testing::Combine( - testing::Values(std::vector({2, 3, 3})), // logits shape testing::ValuesIn(std::vector>({{2, 3}, {3, 3}})), // logits length testing::ValuesIn( std::vector>>({{{0, 1, 0}, {1, 0, 1}}, {{0, 1, 2}, {1, 1, 1}}})), // labels @@ -38,14 +36,14 @@ const auto ctcLossArgsSubset1 = testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_CTCLoss_Set1, CTCLossLayerTest, testing::Combine(ctcLossArgsSubset1, - testing::ValuesIn(fPrecisions), - testing::ValuesIn(iPrecisions), - testing::Values(ov::test::utils::DEVICE_GPU)), + testing::Values(ov::test::static_shapes_to_test_representation({{2, 3, 3}})), // logits shape + testing::ValuesIn(fPrecisions), + testing::ValuesIn(iPrecisions), + testing::Values(ov::test::utils::DEVICE_GPU)), CTCLossLayerTest::getTestCaseName); const auto ctcLossArgsSubset2 = - testing::Combine(testing::Values(std::vector({3, 6, 8})), // logits shape - testing::ValuesIn(std::vector>({{6, 5, 6}, {5, 5, 5}})), // logits length + testing::Combine(testing::ValuesIn(std::vector>({{6, 5, 6}, {5, 5, 5}})), // logits length testing::ValuesIn(std::vector>>( {{{4, 1, 2, 3, 4, 5}, {5, 4, 3, 0, 1, 0}, {2, 1, 3, 1, 3, 0}}, {{2, 1, 5, 3, 2, 6}, {3, 3, 3, 3, 3, 3}, {6, 5, 6, 5, 6, 5}}})), // labels @@ -58,8 +56,9 @@ const auto ctcLossArgsSubset2 = INSTANTIATE_TEST_SUITE_P(smoke_CTCLoss_Set2, CTCLossLayerTest, testing::Combine(ctcLossArgsSubset2, - testing::ValuesIn(fPrecisions), - testing::ValuesIn(iPrecisions), - testing::Values(ov::test::utils::DEVICE_GPU)), + testing::Values(ov::test::static_shapes_to_test_representation({{3, 6, 8}})), // logits shape + testing::ValuesIn(fPrecisions), + testing::ValuesIn(iPrecisions), + testing::Values(ov::test::utils::DEVICE_GPU)), CTCLossLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/einsum.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/einsum.cpp index 94555588b727bb..7303e7a396a2b2 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/einsum.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/einsum.cpp @@ -4,35 +4,34 @@ #include -#include "single_layer_tests/einsum.hpp" - -using namespace ngraph::helpers; -using namespace LayerTestsDefinitions; +#include "single_op_tests/einsum.hpp" namespace { -const std::vector precisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +using ov::test::EinsumLayerTest; + +const std::vector model_types = { + ov::element::f32, + ov::element::f16 }; -const std::vector equationsWithInput = { - { "ij->ji", {{{1, 2}}} }, // transpose 2d - { "ijk->kij", { {1, 2, 3} } }, // transpose 3d - { "ij->i", { {2, 3} } }, // reduce - { "ab,cd->abcd", { { 1, 2}, {3, 4} } }, // no reduction - { "ab,bc->ac", { {2, 3}, {3, 2} } }, // matrix multiplication - { "ab,bcd,bc->ca", { {2, 4}, {4, 3, 1}, {4, 3} } }, // multiple multiplications - { "kii->ki", { {1, 3, 3} } }, // diagonal - { "abbac,bad->ad", { {2, 3, 3, 2, 4}, {3, 2, 1} } }, // diagonal and multiplication with repeated labels - { "a...->...a", { {2, 2, 3} } }, // transpose with ellipsis - { "a...->...", { {2, 2, 3} } }, // reduce with ellipsis - { "ab...,...->ab...", { {2, 2, 3}, {1} } }, // multiply by scalar - { "a...j,j...->a...", { {1, 1, 4, 3}, {3, 4, 2, 1} } } // complex multiplication +const std::vector equationsWithInput = { + { "ij->ji", ov::test::static_shapes_to_test_representation({ {1, 2} }) }, // transpose 2d + { "ijk->kij", ov::test::static_shapes_to_test_representation({ {1, 2, 3} }) }, // transpose 3d + { "ij->i", ov::test::static_shapes_to_test_representation({ {2, 3} }) }, // reduce + { "ab,cd->abcd", ov::test::static_shapes_to_test_representation({ { 1, 2}, {3, 4} }) }, // no reduction + { "ab,bc->ac", ov::test::static_shapes_to_test_representation({ {2, 3}, {3, 2} }) }, // matrix multiplication + { "ab,bcd,bc->ca", ov::test::static_shapes_to_test_representation({ {2, 4}, {4, 3, 1}, {4, 3} }) }, // multiple multiplications + { "kii->ki", ov::test::static_shapes_to_test_representation({ {1, 3, 3} }) }, // diagonal + { "abbac,bad->ad", ov::test::static_shapes_to_test_representation({ {2, 3, 3, 2, 4}, {3, 2, 1} }) }, // diagonal and multiplication with repeated labels + { "a...->...a", ov::test::static_shapes_to_test_representation({ {2, 2, 3} }) }, // transpose with ellipsis + { "a...->...", ov::test::static_shapes_to_test_representation({ {2, 2, 3} }) }, // reduce with ellipsis + { "ab...,...->ab...", ov::test::static_shapes_to_test_representation({ {2, 2, 3}, {1} }) }, // multiply by scalar + { "a...j,j...->a...", ov::test::static_shapes_to_test_representation({ {1, 1, 4, 3}, {3, 4, 2, 1} }) } // complex multiplication }; INSTANTIATE_TEST_SUITE_P(smoke_Einsum, EinsumLayerTest, - ::testing::Combine(::testing::ValuesIn(precisions), + ::testing::Combine(::testing::ValuesIn(model_types), ::testing::ValuesIn(equationsWithInput), ::testing::Values(ov::test::utils::DEVICE_GPU)), EinsumLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/eye.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/eye.cpp similarity index 88% rename from src/plugins/intel_gpu/tests/functional/single_layer_tests/eye.cpp rename to src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/eye.cpp index a347ab6877b16d..d610fc923d8e72 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/eye.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/eye.cpp @@ -2,19 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "shared_test_classes/single_layer/eye.hpp" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/eye.hpp" namespace { +using ov::test::EyeLayerTest; -TEST_P(EyeLayerTest, CompareWithRefs) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - Run(); -} - -const std::vector netPrecisions = - {ElementType::f32, ElementType::f16, ElementType::i32, ElementType::i8, ElementType::u8, ElementType::i64}; +const std::vector model_types = { + ov::element::f32, + ov::element::f16, + ov::element::i32, + ov::element::i8, + ov::element::u8, + ov::element::i64}; const std::vector> eyePars = { // rows, cols, diag_shift @@ -40,7 +39,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Eye2D_WithNonScalar_Test, ::testing::Combine(::testing::ValuesIn(std::vector>{{{1}, {1}, {1}}}), ::testing::ValuesIn(emptyBatchShape), ::testing::ValuesIn(eyePars), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), EyeLayerTest::getTestCaseName); @@ -50,7 +49,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Eye_1DBatch_Test, {{1}, {1}, {1}, {1}}}), ::testing::ValuesIn(batchShapes1D), ::testing::ValuesIn(eyePars), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), EyeLayerTest::getTestCaseName); @@ -60,7 +59,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Eye_2DBatch_Test, {{1}, {1}, {1}, {2}}}), ::testing::ValuesIn(batchShapes2D), ::testing::ValuesIn(eyePars), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), EyeLayerTest::getTestCaseName); @@ -70,7 +69,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Eye_3DBatch_Test, {{1}, {1}, {1}, {3}}}), ::testing::ValuesIn(batchShapes3D), ::testing::ValuesIn(eyePars), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), EyeLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp new file mode 100644 index 00000000000000..72bb27eba05c54 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "single_layer_tests/group_normalization.hpp" + +using namespace ov::test::subgraph; + +namespace { + +const std::vector netPrecisions = { + ov::element::f16, + ov::element::f32, +}; + +const std::vector inputShapes = { + {3, 8, 32, 64}, + {3, 8, 28, 32, 12}, +}; + +const std::vector numGroups = { + 2, 4, +}; + +const std::vector epsilon = { + 0.0025 +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_GroupNormalization, + GroupNormalizationTest, + testing::Combine(testing::ValuesIn(netPrecisions), + ::testing::Values(ov::element::undefined), + ::testing::Values(ov::element::undefined), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes)), + testing::ValuesIn(numGroups), + testing::ValuesIn(epsilon), + testing::Values(ov::test::utils::DEVICE_GPU), + testing::Values(ov::AnyMap())), + GroupNormalizationTest::getTestCaseName); + +} // anonymous namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp index d60cbeb5875472..056a85a926aaa3 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/multiply_add.cpp @@ -2,32 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - #include "subgraph_tests/multiply_add.hpp" -using namespace SubgraphTestsDefinitions; +#include + +using namespace ov::test; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 -}; +const std::vector input_type = {ov::element::f32, ov::element::f16}; -const std::vector> inputShapes = { - {1, 3}, - {1, 3, 2}, - {1, 3, 2, 5}, - {1, 3, 2, 5, 4}, - {1, 3, 2, 2, 4, 5}, +const std::vector inputShapes = { + {1, 3}, + {1, 3, 2}, + {1, 3, 2, 5}, + {1, 3, 2, 5, 4}, + {1, 3, 2, 2, 4, 5}, }; -INSTANTIATE_TEST_SUITE_P(smoke_MultipleAdd_Nd, MultiplyAddLayerTest, - ::testing::Combine( - ::testing::ValuesIn(inputShapes), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - MultiplyAddLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_MultipleAdd_Nd, + MultiplyAddLayerTest, + ::testing::Combine(::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(input_type), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + MultiplyAddLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp index 105a37a30f2c37..5eda8a003d8971 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/perm_conv_perm_concat.cpp @@ -2,42 +2,41 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "subgraph_tests/perm_conv_perm_concat.hpp" -#include "common_test_utils/test_constants.hpp" + +#include + namespace { -std::vector> input_shapes { +std::vector input_shapes{ {1, 1, 7, 32}, {1, 1, 8, 16}, }; -std::vector> kernel_shapes { +std::vector kernel_shapes{ {1, 3}, {1, 5}, }; -std::vector output_channels { +std::vector output_channels{ 32, 64, }; -std::vector netPrecisions = { - InferenceEngine::Precision::FP32, -// InferenceEngine::Precision::FP16, -}; +std::vector netPrecisions = {ov::element::f32}; -std::map additional_config = { -}; -} // namespace - -namespace SubgraphTestsDefinitions { - INSTANTIATE_TEST_SUITE_P(smoke_basic, PermConvPermConcat, - ::testing::Combine( - ::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::ValuesIn(input_shapes), - ::testing::ValuesIn(kernel_shapes), - ::testing::ValuesIn(output_channels), - ::testing::Values(additional_config)), - PermConvPermConcat::getTestCaseName); -} // namespace SubgraphTestsDefinitions +ov::AnyMap additional_config = {}; +} // namespace + +namespace ov { +namespace test { +INSTANTIATE_TEST_SUITE_P(smoke_basic, + PermConvPermConcat, + ::testing::Combine(::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(kernel_shapes), + ::testing::ValuesIn(output_channels), + ::testing::Values(additional_config)), + PermConvPermConcat::getTestCaseName); +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp index 872140ceea815f..78cef86f3b0c09 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/reshape_squeeze_reshape_relu.cpp @@ -2,48 +2,48 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "subgraph_tests/reshape_squeeze_reshape_relu.hpp" + +#include + #include "common_test_utils/test_constants.hpp" -using namespace SubgraphTestsDefinitions; +using namespace ov::test; namespace { - std::vector inputs_squeeze { - {{1, 1, 3}, {0, 1}}, - {{1, 1, 3}, {1}}, - {{1, 3, 1}, {0, 2}}, - {{3, 1, 1}, {1}}, - {{1, 4, 1, 3}, {0, 2}}, - {{3, 1, 2, 4, 4, 3}, {1}}, - {{1, 1, 1, 1, 1, 3}, {0, 1, 2, 3, 4}}, - {{1}, {0}}, - }; - - std::vector inputs_unsqueeze{ - {{1}, {0}}, - {{1}, {0, 1}}, - {{1}, {0, 1, 2}}, - {{1, 2, 3}, {0}}, - {{1, 1, 3}, {1, 2}}, - {{1, 4, 1, 3}, {0, 2}}, - }; - - std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - }; - - - const std::vector opTypes = { - ngraph::helpers::SqueezeOpType::SQUEEZE, - ngraph::helpers::SqueezeOpType::UNSQUEEZE - }; - - INSTANTIATE_TEST_SUITE_P(smoke_reshape_squeeze_reshape_relu, ReshapeSqueezeReshapeRelu, - ::testing::Combine( - ::testing::ValuesIn(inputs_squeeze), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::ValuesIn(opTypes)), - ReshapeSqueezeReshapeRelu::getTestCaseName); +std::vector inputs_squeeze{ + {{1, 1, 3}, {0, 1}}, + {{1, 1, 3}, {1}}, + {{1, 3, 1}, {0, 2}}, + {{3, 1, 1}, {1}}, + {{1, 4, 1, 3}, {0, 2}}, + {{3, 1, 2, 4, 4, 3}, {1}}, + {{1, 1, 1, 1, 1, 3}, {0, 1, 2, 3, 4}}, + {{1}, {0}}, +}; + +std::vector inputs_unsqueeze{ + {{1}, {0}}, + {{1}, {0, 1}}, + {{1}, {0, 1, 2}}, + {{1, 2, 3}, {0}}, + {{1, 1, 3}, {1, 2}}, + {{1, 4, 1, 3}, {0, 2}}, +}; + +std::vector input_types = { + ov::element::f32, + ov::element::f16, +}; + +const std::vector opTypes = {ov::test::utils::SqueezeOpType::SQUEEZE, + ov::test::utils::SqueezeOpType::UNSQUEEZE}; + +INSTANTIATE_TEST_SUITE_P(smoke_reshape_squeeze_reshape_relu, + ReshapeSqueezeReshapeRelu, + ::testing::Combine(::testing::ValuesIn(inputs_squeeze), + ::testing::ValuesIn(input_types), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::ValuesIn(opTypes)), + ReshapeSqueezeReshapeRelu::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp index 3d5a1af905130e..5857c2188cfdb2 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/split_conv_concat.cpp @@ -2,26 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - #include "subgraph_tests/split_conv_concat.hpp" -#include "common_test_utils/test_constants.hpp" -using namespace SubgraphTestsDefinitions; +#include + +using namespace ov::test; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 -}; +const std::vector input_types = {ov::element::f32, ov::element::f16}; -INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, SplitConvConcat, - ::testing::Combine( - ::testing::ValuesIn(netPrecisions), - ::testing::Values(std::vector({1, 6, 40, 40})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - SplitConvConcat::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, + SplitConvConcat, + ::testing::Combine(::testing::ValuesIn(input_types), + ::testing::Values(ov::Shape{1, 6, 40, 40}), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + SplitConvConcat::getTestCaseName); } // namespace - diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp new file mode 100644 index 00000000000000..a32e97d8e8e0fc --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -0,0 +1,217 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "openvino/core/node_vector.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/preprocess/pre_post_process.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/transpose.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "shared_test_classes/base/utils/compare_results.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "subgraphs_builders.hpp" + +using namespace ov::test; + +namespace SubgraphTestsDefinitions { + +using KVCacheTestParams = std::tuple, // input shapes + ov::element::Type, // in/out precision + std::map>; // additional config + +class KVCacheTest : public testing::WithParamInterface, public SubgraphBaseTest { +public: + static std::string get_test_case_name(testing::TestParamInfo obj) { + std::vector input_shapes; + ov::element::Type element_type; + std::map additional_config; + + std::tie(input_shapes, element_type, additional_config) = obj.param; + + std::ostringstream result; + for (const auto& shape : input_shapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : input_shapes) { + result << "("; + if (!shape.second.empty()) { + auto itr = shape.second.begin(); + do { + result << ov::test::utils::vec2str(*itr); + } while (++itr != shape.second.end() && result << "_"); + } + result << ")_"; + } + result << "precision=" << element_type << "_"; + result << "config=("; + for (const auto& configEntry : additional_config) { + result << configEntry.first << ", " << configEntry.second << ":"; + } + result << ")"; + + return result.str(); + } + +protected: + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector input_shapes; + ov::element::Type element_type; + std::map additional_config; + + std::tie(input_shapes, element_type, additional_config) = GetParam(); + + configuration.insert(additional_config.begin(), additional_config.end()); + init_input_shapes(input_shapes); + + inType = outType = element_type; + + function = tests::make_llm_kv_cache_pattern(inputDynamicShapes[0][0], inputDynamicShapes[0][1], inputDynamicShapes[0][3], element_type); + } +}; + +TEST_P(KVCacheTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); +} + +namespace { + +const std::vector precisions = {ov::element::f32, ov::element::f16}; + +const std::vector> input_shapes_basic = { + { + {{-1, 32, -1, 80}, { {1, 32, 0, 80}, {1, 32, 20, 80} }}, + {{-1, -1, 32, 80}, { {1, 20, 32, 80}, {1, 1, 32, 80} }}, + {{-1, 32, -1, -1}, { {1, 32, 1, 20}, {1, 32, 1, 21} }} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_GPU_Dynamic, + KVCacheTest, + ::testing::Combine(::testing::ValuesIn(input_shapes_basic), + ::testing::ValuesIn(precisions), + ::testing::Values(std::map())), + KVCacheTest::get_test_case_name); +} // namespace + +TEST(KVCacheTest, smoke_multipleIterations) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + auto core = ov::Core(); + + const size_t batch = 1; + const size_t n_heads = 32; + const size_t n_features = 80; + const size_t context_size = 20; + size_t cache_size = 0; + + ov::element::Type element_type = ov::element::f16; + + auto model = tests::make_llm_kv_cache_pattern(batch, n_heads, n_features, element_type); + auto compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f16)); + + auto input0 = model->get_parameters().at(0); + auto input1 = model->get_parameters().at(1); + auto input2 = model->get_parameters().at(2); + auto output0 = model->get_results().at(0); + auto output1 = model->get_results().at(1); + + auto infer_request = compiled_model.create_infer_request(); + auto input0_tensor_remote_io = infer_request.get_tensor(input0); + auto input1_tensor_remote_io = infer_request.get_tensor(input1); + auto input2_tensor_remote_io = infer_request.get_tensor(input2); + auto output0_tensor_remote_io = infer_request.get_tensor(output0); + auto output1_tensor_remote_io = infer_request.get_tensor(output1); + + auto compare_tensors = [&model](const std::vector expected, const std::vector& actual) { + ASSERT_EQ(expected.size(), actual.size()); + ASSERT_EQ(expected.size(), model->get_results().size()); + auto compareMap = ov::test::utils::getCompareMap(); + const auto& results = model->get_results(); + for (size_t j = 0; j < results.size(); j++) { + const auto result = results[j]; + for (size_t i = 0; i < result->get_input_size(); ++i) { + std::shared_ptr inputNode = result->get_input_node_shared_ptr(i); + if (std::dynamic_pointer_cast(inputNode)) { + std::shared_ptr nextNodePtr = inputNode->get_input_node_shared_ptr(0); + if (!ngraph::is_type(nextNodePtr)) { + inputNode = nextNodePtr; + } + } + auto it = compareMap.find(inputNode->get_type_info()); + ASSERT_NE(it, compareMap.end()); + it->second(inputNode, i, expected[j], actual[j], 1e-4f, 1e-4f); + } + } + }; + + { + const ov::Shape kv_cache_size_initial = {batch, n_heads, cache_size, n_features}; + const ov::Shape new_token_size_initial = {batch, context_size, n_heads, n_features}; + const ov::Shape matmul_in_size_initial = {batch, n_heads, context_size, context_size}; + + auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size_initial); + auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_initial); + + auto kv_cache_input = infer_request.get_tensor(input0); + kv_cache_input.set_shape(kv_cache_size_initial); + + auto ref_model = model->clone(); + ngraph::helpers::resize_function(ref_model, {kv_cache_input.get_shape(), new_token_data.get_shape(), matmul_data.get_shape()}); + auto results = ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache_input}, {input1, new_token_data}, {input2, matmul_data}}); + + infer_request.set_tensor(input0, kv_cache_input); + infer_request.set_tensor(input1, new_token_data); + infer_request.set_tensor(input2, matmul_data); + + infer_request.infer(); + + compare_tensors(results, {infer_request.get_tensor(output0), infer_request.get_tensor(output1)}); + + cache_size += context_size; + } + + const size_t input_tokens = 1; + const size_t niters = 10; + const ov::Shape new_token_size = {batch, input_tokens, n_heads, n_features}; + size_t context_length = cache_size + input_tokens; + for (size_t i = 0; i < niters; i++, context_length += input_tokens) { + ov::Shape matmul_in_size_loop = {batch, n_heads, input_tokens, context_length}; + auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size); + auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_loop); + + auto kv_cache_input = infer_request.get_tensor(output0); + auto kv_shape = kv_cache_input.get_shape(); + + auto ref_model = model->clone(); + ngraph::helpers::resize_function(ref_model, {kv_shape, new_token_data.get_shape(), matmul_data.get_shape()}); + auto results = ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache_input}, {input1, new_token_data}, {input2, matmul_data}}); + + auto new_token_input = infer_request.get_tensor(input1); + new_token_input.set_shape(new_token_data.get_shape()); + auto matmul_input = infer_request.get_tensor(input2); + matmul_input.set_shape(matmul_data.get_shape()); + + new_token_data.copy_to(new_token_input); + matmul_data.copy_to(matmul_input); + + infer_request.set_tensor(input0, kv_cache_input); + infer_request.set_tensor(input1, new_token_input); + infer_request.set_tensor(input2, matmul_input); + + infer_request.infer(); + + compare_tensors(results, {infer_request.get_tensor(output0), infer_request.get_tensor(output1)}); + } +} + +} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp index 313015da3406ba..75bdb9f0ec71a7 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp @@ -2,19 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ov_models/builders.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/matmul.hpp" #include "shared_test_classes/base/layer_test_utils.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "transformations/rt_info/decompression.hpp" -using namespace ngraph; +using namespace ov; using namespace ov::test; namespace SubgraphTestsDefinitions { /* - * Subtract_const(U8) + * Subtract_const(U8/NF4/U4) * / - * Weights(U8) Convert(F32) + * Weights(U8/NF4/U4) Convert(F32) * | / * Convert(F32) Reshape(optional) * \ / Multiply_const(F32) @@ -29,7 +31,20 @@ namespace SubgraphTestsDefinitions { * | * Bias */ -using MatmulWeightsDecompressionParams = std::tuple, // input shapes + +struct ShapeParams { + ShapeParams() = default; + ShapeParams(InputShape data_shape, ov::Shape weights_shape, int weights_group_size = -1) + : data_shape(std::move(data_shape)), + weights_shape(std::move(weights_shape)), + weights_group_size(weights_group_size) {} + + InputShape data_shape; + ov::Shape weights_shape; + // Decompression group size. If the value is equal to -1, ordinary decompression is used + int weights_group_size; +}; +using MatmulWeightsDecompressionParams = std::tuple, // class MatmulWeightsDecompression : public testing::WithParamInterface, public SubgraphBaseTest { public: static std::string get_test_case_name(testing::TestParamInfo obj) { - std::vector inputShapes; + ShapeParams shape_params; ov::test::ElementType weights_precision; ov::test::ElementType activations_precision; bool transpose; @@ -48,7 +63,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface additional_config; - std::tie(inputShapes, + std::tie(shape_params, weights_precision, activations_precision, transpose, @@ -57,20 +72,9 @@ class MatmulWeightsDecompression : public testing::WithParamInterface init_subgraph(std::vector& inputShapes, - const ov::element::Type data_precision, - const ov::element::Type weights_precision, - const bool transpose_weights, - const bool add_subtract, - const bool reshape_on_decompression) { - ov::ParameterVector params{std::make_shared(data_precision, inputShapes[0])}; + std::shared_ptr init_subgraph(const ov::PartialShape& data_shape, + const ov::Shape& weights_shape, + const int group_size, + const ov::element::Type data_precision, + const ov::element::Type weights_precision, + const bool transpose_weights, + const bool add_subtract, + const bool reshape_on_decompression) { + ov::ParameterVector params{std::make_shared(data_precision, data_shape)}; + const auto weights_subgraph = init_compressed_weights_subgraph(weights_shape, + group_size, + data_precision, + weights_precision, + transpose_weights, + add_subtract, + reshape_on_decompression); + + auto mat_mul = std::make_shared(params[0], weights_subgraph); + return std::make_shared(NodeVector{mat_mul}, params, "MatmulWeightsDecompression"); + } + + std::shared_ptr init_compressed_weights_subgraph(const ov::Shape& weights_shape, + const int group_size, + const ov::element::Type data_precision, + const ov::element::Type weights_precision, + const bool transpose_weights, + const bool add_subtract, + const bool reshape_on_decompression_constant) { auto transpose_if_necessary = [&](const ov::Shape& shape) { - if (!transpose_weights) - return shape; - auto transposed_shape = shape; - std::swap(*transposed_shape.rbegin(), *(transposed_shape.rbegin() + 1)); - return transposed_shape; + auto result_shape = shape; + if (transpose_weights) + std::swap(*result_shape.rbegin(), *(result_shape.rbegin() + 1)); + return result_shape; }; - auto weights_shape = transpose_if_necessary(inputShapes[1].to_shape()); - auto weights = ngraph::builder::makeConstant(weights_precision, weights_shape, {}, true); + const bool group_decompression = group_size != -1; + // Weights has shape [I, O], where + // I - input channels + // O - output channels + // In case of group decompression, input channels dimension is split into 2: I -> [N, G], where + // N - number of groups + // G - group size + auto transformed_weights_shape = transpose_if_necessary(weights_shape); + if (group_decompression) { + OPENVINO_ASSERT(weights_shape[0] % group_size == 0, + "Weights output channels count (", + weights_shape[0], + ") must be divisible by decompression group size (", + group_size, + ")."); + auto in_channel_idx = transpose_weights ? transformed_weights_shape.size() - 1 : transformed_weights_shape.size() - 2; + transformed_weights_shape[in_channel_idx] = weights_shape[0] / group_size; + transformed_weights_shape.insert(transformed_weights_shape.begin() + in_channel_idx + 1, group_size); + } + auto weights_tensor = ov::test::utils::create_and_fill_tensor(weights_precision, transformed_weights_shape); + auto weights = std::make_shared(weights_tensor); weights->set_friendly_name("Compressed_weights"); auto weights_convert = std::make_shared(weights, data_precision); std::shared_ptr mul_parent = weights_convert; - auto output_channels = transpose_weights ? *(weights_shape.rbegin() + 1) : *weights_shape.rbegin(); - auto scaleshift_target_shape = transpose_if_necessary(ov::Shape{1, output_channels}); - auto scaleshift_const_shape = reshape_on_decompression ? ov::Shape{output_channels} : scaleshift_target_shape; + auto output_channels = *weights_shape.rbegin(); + + // Decompression constants shape: + // Ordinary decompression: [O, 1] + // Group decompression: [O, N, 1] + ov::Shape scaleshift_target_shape{output_channels}; + scaleshift_target_shape.insert(scaleshift_target_shape.begin(), group_decompression ? weights_shape[0] / group_size : 1); + scaleshift_target_shape = transpose_if_necessary(scaleshift_target_shape); + if (group_decompression) { + auto in_channel_idx = transpose_weights ? scaleshift_target_shape.size() - 1 : scaleshift_target_shape.size() - 2; + scaleshift_target_shape.insert(scaleshift_target_shape.begin() + in_channel_idx + 1, 1); + } + + auto scaleshift_const_shape = scaleshift_target_shape; + if (reshape_on_decompression_constant) + scaleshift_const_shape.erase(std::remove(scaleshift_const_shape.begin(), scaleshift_const_shape.end(), 1), scaleshift_const_shape.end()); if (add_subtract) { - auto shift_const = ngraph::builder::makeConstant(weights_precision, scaleshift_const_shape, {}, true); + auto shift_tensor = ov::test::utils::create_and_fill_tensor(weights_precision, scaleshift_const_shape); + auto shift_const = std::make_shared(shift_tensor); std::shared_ptr shift_convert = std::make_shared(shift_const, data_precision); - if (reshape_on_decompression) { + if (reshape_on_decompression_constant) { auto shift_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); auto shift_reshape = std::make_shared(shift_convert, shift_reshape_const, false); shift_convert = shift_reshape; @@ -122,32 +179,36 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_convert, shift_convert); } - std::shared_ptr scale_const = ngraph::builder::makeConstant(data_precision, scaleshift_const_shape, {}, true); - if (reshape_on_decompression) { + auto scale_tensor = ov::test::utils::create_and_fill_tensor(data_precision, scaleshift_const_shape, 1, -0.5, 10000); + std::shared_ptr scale_const = std::make_shared(scale_tensor); + if (reshape_on_decompression_constant) { auto scale_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); auto scale_reshape = std::make_shared(scale_const, scale_reshape_const, false); scale_const = scale_reshape; } - auto multiply = std::make_shared(mul_parent, scale_const); + std::shared_ptr last_node = std::make_shared(mul_parent, scale_const); - std::shared_ptr matmul_weights = multiply; + if (group_decompression) { + auto reshape_target_shape = transpose_weights ? std::vector{-1, static_cast(weights_shape[0])} + : std::vector{static_cast(weights_shape[0]), -1}; + auto target_shape_node = ov::opset10::Constant::create(ov::element::i32, {reshape_target_shape.size()}, reshape_target_shape); + last_node = std::make_shared(last_node, target_shape_node, false); + } if (transpose_weights) { - const size_t rank = matmul_weights->get_output_partial_shape(0).size(); + const size_t rank = last_node->get_output_partial_shape(0).size(); std::vector order(rank); std::iota(order.begin(), order.end(), 0); std::swap(*order.rbegin(), *(order.rbegin() + 1)); auto transpose_constant = ov::opset10::Constant::create(ov::element::i32, {rank}, order); - auto transpose = std::make_shared(matmul_weights, transpose_constant); - matmul_weights = transpose; + last_node = std::make_shared(last_node, transpose_constant); } - auto matMul = builder::makeMatMul(params[0], matmul_weights); - return std::make_shared(NodeVector{matMul}, params, "MatmulWeightsDecompression"); + return last_node; } void SetUp() override { targetDevice = ov::test::utils::DEVICE_GPU; - std::vector inputShapes; + ShapeParams shape_params; ov::test::ElementType weights_precision; ov::test::ElementType activations_precision; bool transpose_weights; @@ -155,7 +216,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface additional_config; - std::tie(inputShapes, + std::tie(shape_params, weights_precision, activations_precision, transpose_weights, @@ -164,14 +225,47 @@ class MatmulWeightsDecompression : public testing::WithParamInterface 200) so fp16 representation & math error is larger than default threshold + if (weights_input_channels > 2048) { + abs_threshold = 4.0f; + } else { + abs_threshold = 1.0f; + } + } + } + + void generate_inputs(const std::vector& target_input_static_shapes) override { + inputs.clear(); + const auto& model_inputs = function->inputs(); + for (size_t i = 0; i < model_inputs.size(); ++i) { + const auto& model_input = model_inputs[i]; + ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(model_input.get_element_type(), + target_input_static_shapes[i], + 2, + -1, + 10000); + inputs.insert({model_input.get_node_shared_ptr(), tensor}); + } } - void checkResults() { + void check_results() { const auto& test_param = GetParam(); ov::test::ElementType weights_precision = std::get<1>(test_param); for (const auto& n : compiledModel.get_runtime_model()->get_ordered_ops()) { @@ -185,24 +279,20 @@ class MatmulWeightsDecompression : public testing::WithParamInterface activations_precisions = {ov::element::f32, ov::element::f16}; const std::vector weights_precisions = {ov::element::u8}; -const std::vector> input_shapes_basic = { - {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {{}, {{16, 32}}}}, - {{{}, {{10, 40, 496}}}, {{}, {{1, 496, 240}}}}, - {{{}, {{1, 4, 48}}}, {{}, {{48, 256}}}}, - {{{}, {{11, 339, 377}}}, {{}, {{377, 335}}}}, - {{{}, {{1, 4, 32}}}, {{}, {{32, 256}}}}, - {{{}, {{1, 4, 512}}}, {{}, {{512, 256}}}}, - {{{}, {{1, 16, 32}}}, {{}, {{32, 64}}}}, - {{{}, {{2, 4, 32}}}, {{}, {{32, 65}}}}, - {{{}, {{3, 12, 768}}}, {{}, {{768, 1024}}}}, - {{{}, {{11, 339, 577}}}, {{}, {{577, 335}}}}, +const std::vector input_shapes_basic = { + {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}}, + {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, + {{{}, {{1, 4, 16}}}, {1, 16, 32}}, + {{{}, {{10, 40, 496}}}, {1, 496, 240}}, + {{{}, {{1, 4, 48}}}, {48, 256}}, + {{{}, {{11, 339, 377}}}, {377, 335}} }; INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, @@ -216,15 +306,16 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, ::testing::Values(std::map())), MatmulWeightsDecompression::get_test_case_name); -const std::vector> input_shapes_corner_cases_basic = { - {{{-1, -1, -1}, {{1, 4, 16}}}, {{}, {{1, 16, 32}}}}, - {{{}, {{1, 4, 16}}}, {{}, {{1, 16, 32}}}}, - {{{-1, -1, -1}, {{1, 4, 16}}}, {{}, {{16, 32}}}}, - {{{-1, -1, -1, -1}, {{1, 1, 4, 16}}}, {{}, {{1, 1, 16, 32}}}}, - {{{}, {{1, 1, 4, 16}}}, {{}, {{1, 1, 16, 32}}}}, +const std::vector input_shapes_corner_cases_basic = { + {{{-1, -1, -1}, {{1, 4, 16}}}, {1, 16, 32}}, + {{{-1, -1, -1}, {{1, 4, 16}}}, {16, 32}}, + {{{-1, -1, 16}, {{1, 4, 16}}}, {16, 32}, 4}, }; -const std::vector> input_shapes_corner_cases_big = { - {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {{}, {{1, 480, 256}}}}, +const std::vector input_shapes_corner_cases_big = { + {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {1, 480, 256}}, + {{{-1, -1, -1}, {{1, 1, 4096}}}, {4096, 4096}, 128}, + {{{-1, -1, -1}, {{1, 1, 4096}}}, {4096, 4096}}, + {{{-1, 4096}, {{1, 4096}}}, {4096, 4096}, 128}, }; const std::vector transpose_weights = {true, false}; @@ -242,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_basic, ::testing::Values(std::map{})), MatmulWeightsDecompression::get_test_case_name); -INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_big, +INSTANTIATE_TEST_SUITE_P(MatMulCompressedWeights_corner_cases_big, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_corner_cases_big), ::testing::ValuesIn(weights_precisions), diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/loop.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/loop.cpp new file mode 100644 index 00000000000000..8c7de510531348 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/loop.cpp @@ -0,0 +1,316 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include "ov_models/utils/ov_helpers.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ov_models/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_constants.hpp" +#include "shared_test_classes/base/utils/ranges.hpp" +#include +#include "shared_test_classes/base/utils/compare_results.hpp" +#include "openvino/pass/constant_folding.hpp" +#include + +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +using DynamicShapeLoopParams = typename std::tuple< + bool, + std::tuple< + bool, + int64_t, + int64_t, + int64_t + >, + int64_t, + InputShape, + InferenceEngine::Precision, + std::string, + ov::AnyMap + >; + +/** + * Test case with Dynamic SHAPE version of loop operation. + * Total iteration count is dynamic. + */ +class DynamicShapeLoopTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + bool static_iter_num; + bool static_continue_cond; + int64_t max_iter_num; + int64_t dynamic_exit; + int64_t axis; + int64_t start_value; + InputShape data_shapes; + InferenceEngine::Precision data_prc; + std::string targetDevice; + auto args_pack = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis); + ov::Any configuration; + std::tie( + static_continue_cond, + args_pack, + start_value, + data_shapes, + data_prc, + targetDevice, + configuration) = obj.param; + + std::ostringstream result; + result << "static_iter_num=" << std::to_string(static_iter_num) << "_"; + result << "static_continue_cond=" << std::to_string(static_continue_cond) << "_"; + result << "max_iter_num=" << std::to_string(max_iter_num) << "_"; + result << "dynamic_exit=" << std::to_string(dynamic_exit) << "_"; + result << "axis=" << std::to_string(axis) << "_"; + result << "start_value=" << std::to_string(start_value) << "_"; + result << "max_iter_num=" << std::to_string(max_iter_num) << "_"; + result << "IS=("; + result << ov::test::utils::partialShape2str({data_shapes.first}) << "_"; + for (size_t i = 0lu; i < data_shapes.second.size(); i++) { + result << "{"; + result << ov::test::utils::vec2str(data_shapes.second[i]) << "_"; + result << "}_"; + } + result << ")_"; + result << "netPRC=" << data_prc << "_"; + result << "targetDevice=" << targetDevice << "_"; + + auto res_str = result.str(); + std::replace(res_str.begin(), res_str.end(), '-', '_'); + return res_str; + } + +private: + bool static_iter_num; // trip count provided by constant node + bool static_continue_cond; // initial_cond provided by constant node + int64_t max_iter_num; // -1 means infinity loop (expected dynamic exit condition in body) + int64_t dynamic_exit; // -1 means always true + int64_t axis; // -1 means no auto concatenation + int64_t start_value; + InputShape data_shapes; + InferenceEngine::Precision data_prc; + +protected: + void SetUp() override { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + auto args_pack = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis); + std::tie( + static_continue_cond, + args_pack, + start_value, + data_shapes, + data_prc, + targetDevice, + configuration) = GetParam(); + + const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc); + const auto inputShape = data_shapes.first; + const auto scalarShape = ngraph::Shape{}; + init_input_shapes({data_shapes}); + + ngraph::ParameterVector params{}; + auto cond_input_create = [¶ms] (ngraph::element::Type prc, const ov::PartialShape &shape, int value = 0, bool is_static = false) + -> std::shared_ptr { + if (is_static) + return std::make_shared(prc, shape.to_shape(), value); + + auto input = std::make_shared(prc, shape); + params.push_back(input); + return input; + }; + + auto start = cond_input_create(prc, inputShape); + start->set_friendly_name("start"); + auto count = cond_input_create(ngraph::element::i64, scalarShape, max_iter_num, static_iter_num); + count->set_friendly_name("count"); + auto skip = cond_input_create(ngraph::element::boolean, scalarShape, true, static_continue_cond); + skip->set_friendly_name("skip"); + + // + // count skip start count skip start + // / / + // ___*___*____ __________*___*____ | idx | data | out | + // | idx in | | ex_val idx in | | 0 | 7 | 7 | + // | | / | | | / | / | | 1 | 7 | 8 | + // | add | | less add | | 2 | 8 | 10 | + // | | true | | | | | | 3 | 10 | 13 | + // | | | | | | | | ~~~~~ * * * ~~~~~ + // | out cnd | | cnd out | + // |___*____*___| |____*_____*________| + // Full loop Dynamic exit loop + // n_iter = count n_iter = ex_val + // + auto b_indx = std::make_shared(ngraph::element::i64, ngraph::Shape{}); + b_indx->set_friendly_name("body_index"); + auto b_data = std::make_shared(prc, inputShape); + b_data->set_friendly_name("body_data"); + auto b_indx_cast = std::make_shared(b_indx, prc); + b_indx_cast->set_friendly_name("body_index_cast"); + auto b_add = std::make_shared(b_data, b_indx_cast); + b_add->set_friendly_name("body_addition"); + + std::shared_ptr b_cond; + if (dynamic_exit == -1) { + b_cond = std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + b_cond->set_friendly_name("body_condition"); + } else { + auto b_exit_value = std::make_shared(ngraph::element::i64, scalarShape, dynamic_exit); + b_exit_value->set_friendly_name("body_exit_value"); + b_cond = std::make_shared(b_indx, b_exit_value); + b_cond->set_friendly_name("body_condition_with_exit_value"); + } + + auto body = std::make_shared( + ngraph::OutputVector {b_cond, b_add}, // TODO: check with reverse + ngraph::ParameterVector {b_indx, b_data}); // TODO: check with reverse + body->set_friendly_name("body_network"); + + auto loop = std::make_shared(count, skip); + loop->set_friendly_name("loop"); + loop->set_function(body); + loop->set_special_body_ports({0, 0}); + loop->set_merged_input(b_data, start, b_add); + if (axis == -1) + loop->get_iter_value(b_add, -1); + else + loop->get_concatenated_slices(b_add, 0, 1, 1, -1, axis); + + function = std::make_shared( + ngraph::OutputVector {loop}, + params); + function->set_friendly_name("outer_body_network"); + } +}; + + +TEST_P(DynamicShapeLoopTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); +} + +std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::I32 +}; + +ov::AnyMap netConfigurations = { + {GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO} +}; + +static const std::vector> dynamic_loop_types_axis_0 { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 10, -1, 0 }, // n_iter 10, no dynamic exit +}; + +std::vector inputs_0 = { + InputShape(ov::PartialShape({1, -1, 2}), {{1, 4, 2}, {1, 5, 2}, {1, 10, 2}}), +}; + +INSTANTIATE_TEST_SUITE_P(smoke_DynamicShapeLoop_axis_0, DynamicShapeLoopTest, + testing::Combine( + /* static_continue_cond */ testing::Values(true), + /* args_pack */ testing::ValuesIn(dynamic_loop_types_axis_0), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_0), + /* data_prc */ testing::ValuesIn(netPrecisions), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU), + /* configuration */ testing::Values(netConfigurations)), + DynamicShapeLoopTest::getTestCaseName); + +static const std::vector> dynamic_loop_types_1 { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 5, -1, 1 }, // n_iter 5, no dynamic exit +}; + +std::vector inputs_1 = { + InputShape(ov::PartialShape({-1, 1, 4, -1}), {{2, 1, 4, 10}, {3, 1, 4, 14}, {6, 1, 4, 16}}), +}; + +INSTANTIATE_TEST_SUITE_P(smoke_DynamicShapeLoop_axis_1, DynamicShapeLoopTest, + testing::Combine( + /* static_continue_cond */ testing::Values(true), + /* args_pack */ testing::ValuesIn(dynamic_loop_types_1), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_1), + /* data_prc */ testing::ValuesIn(netPrecisions), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU), + /* configuration */ testing::Values(netConfigurations)), + DynamicShapeLoopTest::getTestCaseName); + +static const std::vector> dynamic_loop_types_2 { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 10, -1, 2 }, // n_iter 10, no dynamic exit +}; + +std::vector inputs_2 = { + InputShape(ov::PartialShape({-1, -1, 1, 6}), {{2, 4, 1, 6}, {10, 40, 1, 6}, {12, 16, 1, 6}}), +}; + +INSTANTIATE_TEST_SUITE_P(smoke_DynamicShapeLoop_axis_2, DynamicShapeLoopTest, + testing::Combine( + /* static_continue_cond */ testing::Values(true), + /* args_pack */ testing::ValuesIn(dynamic_loop_types_2), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_2), + /* data_prc */ testing::ValuesIn(netPrecisions), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU), + /* configuration */ testing::Values(netConfigurations)), + DynamicShapeLoopTest::getTestCaseName); + +static const std::vector> dynamic_loop_types_no_auto_concat { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 10, -1, -1 }, // n_iter 5, no dynamic exit +}; + +std::vector inputs_no_auto_concat = { + InputShape(ov::PartialShape({-1, 1, 6}), {{2, 1, 6}, {10, 1, 6}, {12, 1, 6}}), +}; + +INSTANTIATE_TEST_SUITE_P(smoke_DynamicShapeLoop_no_auto_concat, DynamicShapeLoopTest, + testing::Combine( + /* static_continue_cond */ testing::Values(true), + /* args_pack */ testing::ValuesIn(dynamic_loop_types_no_auto_concat), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_no_auto_concat), + /* data_prc */ testing::ValuesIn(netPrecisions), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU), + /* configuration */ testing::Values(netConfigurations)), + DynamicShapeLoopTest::getTestCaseName); + +static const std::vector> dynamic_loop_types_dynamic_exit { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 5, 3, -1 }, // n_iter 3, dynamic exit on 3 + std::tuple{ true , 5, 7, 1 }, // n_iter 5, dynamic exit not reached + std::tuple{ true , -1, 5, -1 }, // n_iter 5, inf loop with dynamic exit on 5 +}; + +std::vector inputs_dynamic_exit = { + InputShape(ov::PartialShape({-1, 1, 2}), {{4, 1, 2}, {10, 1, 2}, {12, 1, 2}}), +}; + +INSTANTIATE_TEST_SUITE_P(smoke_DynamicShapeLoop_dynamic_exit, DynamicShapeLoopTest, + testing::Combine( + /* static_continue_cond */ testing::Values(true), + /* args_pack */ testing::ValuesIn(dynamic_loop_types_dynamic_exit), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_dynamic_exit), + /* data_prc */ testing::ValuesIn(netPrecisions), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU), + /* configuration */ testing::Values(netConfigurations)), + DynamicShapeLoopTest::getTestCaseName); + +} // namespace GPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_gpu/tests/unit/CMakeLists.txt b/src/plugins/intel_gpu/tests/unit/CMakeLists.txt index 3dda088627b833..1230e57effbd74 100644 --- a/src/plugins/intel_gpu/tests/unit/CMakeLists.txt +++ b/src/plugins/intel_gpu/tests/unit/CMakeLists.txt @@ -23,14 +23,12 @@ file(GLOB_RECURSE SOURCES_MAIN ) if (NOT ENABLE_ONEDNN_FOR_GPU) - set(EXCLUDE_DIR "/onednn/") - foreach (SOURCE_FILE ${SOURCES_MAIN}) - string (FIND ${SOURCE_FILE} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND) - if (NOT ${EXCLUDE_DIR_FOUND} EQUAL -1) - message (Exclude : ${SOURCE_FILE}) + set(EXCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/onednn/") + foreach (SOURCE_FILE IN LISTS SOURCES_MAIN) + if (SOURCE_FILE MATCHES "${EXCLUDE_DIR}.*") list (REMOVE_ITEM SOURCES_MAIN ${SOURCE_FILE}) endif () - endforeach(SOURCE_FILE) + endforeach() endif() if (MSVC) diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp index aa5934a710f196..df20a50e33b9b2 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp @@ -625,6 +625,12 @@ TEST_P(conv_fp32_activation, basic) { reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); execute(p); } @@ -635,9 +641,9 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation, ::testing::ValuesIn( convolution_test_params{ CASE_CONV_FP32_3, 2, 2, 3 }, convolution_test_params{ CASE_CONV_FP32_4, 2, 2, 3 }, - convolution_test_params{ CASE_CONV_FP16_4, 2, 2, 3 }, - convolution_test_params{ CASE_CONV_FP16_4, 2, 2, 3 }, - convolution_test_params{ CASE_CONV_FP16_4, 2, 2, 3 }, + convolution_test_params{ CASE_CONV_FP16_1, 2, 2, 3 }, + convolution_test_params{ CASE_CONV_FP16_2, 2, 2, 3 }, + convolution_test_params{ CASE_CONV_FP16_3, 2, 2, 3 }, convolution_test_params{ CASE_CONV_FP16_4, 2, 2, 3 }, })); @@ -655,6 +661,12 @@ TEST_P(conv_fp32_scale, basic) { reorder("reorder_bfyx", input_info("scale"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); execute(p); } @@ -685,6 +697,12 @@ TEST_P(conv_fp32_bias, basic) { reorder("reorder_bfyx", input_info("add_bias"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); execute(p); } @@ -700,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_bias, ::testing::ValuesIn(std::v convolution_test_params{ CASE_CONV_FP16_2, 2, 2, 3 }, convolution_test_params{ CASE_CONV_FP16_3, 2, 2, 3 }, convolution_test_params{ CASE_CONV_FP16_4, 2, 2, 3 }, - convolution_test_params{ CASE_CONV_FP16_10, 2, 2, 3 }, + // convolution_test_params{ CASE_CONV_FP16_10, 2, 2, 3 }, // Issue: 94154 })); class conv_fp32_double_bias : public ConvFusingTest {}; @@ -800,6 +818,12 @@ TEST_P(conv_fp32_prelu_eltwise, basic_sum) { ); tolerance = default_tolerance(p.data_type); + if (engine.get_device_info().supports_immad && p.default_type == data_types::f16) { + tolerance *= 2; + if (p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + } execute(p); } @@ -818,6 +842,12 @@ TEST_P(conv_fp32_prelu_eltwise, basic_sum_slope_2) { ); tolerance = default_tolerance(p.data_type); + if (engine.get_device_info().supports_immad && p.default_type == data_types::f16) { + tolerance *= 2; + if (p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + } execute(p); } @@ -835,6 +865,12 @@ TEST_P(conv_fp32_prelu_eltwise, basic_prod) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.data_type); execute(p); } @@ -854,6 +890,12 @@ TEST_P(conv_fp32_prelu_eltwise, basic_prod_slope_2) { ); tolerance = default_tolerance(p.data_type); + if (engine.get_device_info().supports_immad && p.default_type == data_types::f16) { + tolerance *= 4; + if (p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + } execute(p); } @@ -873,6 +915,12 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) { ); tolerance = default_tolerance(p.data_type); + if (engine.get_device_info().supports_immad && p.default_type == data_types::f16) { + tolerance *= 2; + if (p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + } execute(p); } @@ -891,6 +939,12 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum_slope_2) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.data_type); execute(p); } @@ -911,6 +965,12 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) { ); tolerance = default_tolerance(p.data_type); + if (engine.get_device_info().supports_immad && p.default_type == data_types::f16) { + tolerance *= 4; + if (p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + } execute(p); } @@ -929,6 +989,12 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod_slope_2) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.data_type); execute(p); } @@ -1129,6 +1195,9 @@ TEST_P(conv_fp32_multi_eltwise_4_clamp, basic) { cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); + if (p.default_type == data_types::f16) { + tolerance *= 4.f; // Issue: 94154 + } execute(p); } @@ -1168,6 +1237,9 @@ TEST_P(conv_fp32_eltwise_fusing_extend_ops, pattern01_simple_sub) { cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.default_type); + if (p.default_type == data_types::f16) { + tolerance *= 8.f; // Issue: 94154 + } execute(p); } @@ -1335,7 +1407,16 @@ TEST_P(conv_fp32_multi_eltwise_quantization, basic) { reorder("reorder_bfyx", input_info("eltwise2"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = 1.f; + if (p.default_type == data_types::f16) { + tolerance *= 8.f; // Issue: 94154 + } execute(p); } @@ -1420,7 +1501,15 @@ TEST_P(conv_fp32_swish, basic) { reorder("reorder_bfyx", input_info("mul"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); + if (p.default_type == data_types::f16) { + tolerance *= 3.f; // Issue: 94154 + } execute(p); } @@ -1784,6 +1873,11 @@ TEST_P(conv_swap_xy_with_eltwise_diff_sizes, basic) { reorder("reorder_bfyx", input_info("sum"), p.default_format, data_types::f16) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); execute(p); } @@ -3329,6 +3423,10 @@ TEST_P(conv_gen9_common_conv_fwd_data_1stconv, basic) { ); tolerance = default_tolerance(p.default_type); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16) { + tolerance *= 2; // Issue: 94154 + } execute(p); } @@ -3438,6 +3536,12 @@ TEST_P(conv_fp32_activation_abs_onednn, basic) { reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); execute(p); } @@ -3462,6 +3566,12 @@ TEST_P(conv_fp32_activation_mish_onednn, basic) { ); tolerance = default_tolerance(p.default_type); + if (engine.get_device_info().supports_immad && p.default_type == data_types::f16) { + tolerance *= 4; + if (p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + } execute(p); } @@ -3484,6 +3594,12 @@ TEST_P(conv_fp32_activation_swish_onednn, basic) { reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); execute(p); } @@ -3508,6 +3624,12 @@ TEST_P(conv_fp32_activation_hswish_onednn, basic) { ); tolerance = default_tolerance(p.default_type); + if (engine.get_device_info().supports_immad && p.default_type == data_types::f16) { + tolerance *= 8; + if (p.weights_format == format::gs_oiyx_gsv16) { + GTEST_SKIP(); // Issue: 94154 + } + } execute(p); } @@ -3530,6 +3652,11 @@ TEST_P(conv_fp32_activation_exp_onednn, basic) { reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = default_tolerance(p.default_type); execute(p); } @@ -4440,6 +4567,8 @@ TEST_P(conv_after_permute_not_optimizing, basic) { if (!engine.get_device_info().supports_immad) return; + GTEST_SKIP(); // Issue: 94154 + auto p = GetParam(); create_topologies( diff --git a/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp index 5702b695d359a0..7dba6f2ca7e266 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp @@ -240,6 +240,13 @@ TEST_P(deconv_actv, basic) { activation("act", input_info("deconv"), activation_func::relu), reorder("out", input_info("act"), p.default_format, data_types::f32) ); + + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::is_os_yx_isv16_osv16) { + GTEST_SKIP(); // Issue: 94154 + } + // Need much higher tolerance because of deconvolution -> convolution optimization tolerance = 1.f; execute(p); @@ -335,6 +342,12 @@ TEST_P(deconv_bias, basic) { reorder("out", input_info("bias_add"), p.default_format, data_types::f32) ); + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::is_os_yx_isv16_osv16) { + GTEST_SKIP(); // Issue: 94154 + } + // Need much higher tolerance because of deconvolution -> convolution optimization tolerance = 1.f; execute(p); @@ -457,6 +470,13 @@ class deconv_actv_eltw_actv : public DeconvolutionFusingTest { activation("act2", input_info("eltw"), activation_func::relu), reorder("out", input_info("act2"), p.default_format, data_types::f32) ); + + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::is_os_yx_isv16_osv16) { + GTEST_SKIP(); // Issue: 94154 + } + // Need much higher tolerance because of deconvolution -> convolution optimization tolerance = 1.f; execute(p, is_caching_test); @@ -570,6 +590,12 @@ TEST_P(deconv_scale_actv_quant_i8, basic) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives++; + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + p.weights_format == format::is_os_yx_isv16_osv16) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = 1.f; execute(p); } @@ -681,6 +707,14 @@ TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) { input_info("out2_lo"), input_info("out2_hi"), 255, data_types::i8), reorder("out", input_info("quant2"), p.default_format, data_types::f32) ); + + if (engine.get_device_info().supports_immad && + p.default_type == data_types::f16 && + (p.weights_format == format::is_os_yx_isv16_osv16 || + p.weights_format == format::is_os_zyx_isv16_osv16)) { + GTEST_SKIP(); // Issue: 94154 + } + tolerance = 2.1f; execute(p); } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp index 50ddb9870e51e2..bf72af9c91837f 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp @@ -124,6 +124,9 @@ TEST_P(eltwise_quantize, u8) { ); tolerance = default_tolerance(data_types::i8); + if (p.default_type == data_types::f16 && p.default_format == format::b_fs_yx_fsv4) { + tolerance *= 2.f; // Issue: 94154 + } execute(p); } @@ -143,6 +146,9 @@ TEST_P(eltwise_quantize, i8_per_channel) { ); tolerance = default_tolerance(data_types::i8); + if (p.default_type == data_types::f16 && p.default_format == format::b_fs_yx_fsv4) { + tolerance *= 11.f; // Issue: 94154 + } execute(p); } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp index 0eaf6cbee32543..58694f91d4c3ab 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp @@ -121,7 +121,7 @@ class BaseFusingTest : public ::testing::TestWithParam { VF rnd_vec = rg.generate_random_1d(s.count(), min_random, max_random); set_values(prim, rnd_vec); } else if (l.data_type == data_types::f16) { - VF rnd_vec = rg.generate_random_1d(s.count(), -1, 1); + VF rnd_vec = rg.generate_random_1d(s.count(), -1, 1); set_values(prim, rnd_vec); } else { VF rnd_vec = rg.generate_random_1d(s.count(), -1, 1); diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gather_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gather_fusion_test.cpp index 39022f9f7b870d..c1af88852dde09 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gather_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gather_fusion_test.cpp @@ -129,7 +129,7 @@ TEST_P(gather_quantize, basic) { data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)), data("out_lo", get_mem(get_single_element_layout(p), -127)), data("out_hi", get_mem(get_single_element_layout(p), 127)), - gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.out_shape), + gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.dictionary_shape.size(), p.out_shape), quantize("quantize", input_info("gather_prim"), input_info("in_lo"), input_info("in_hi"), input_info("out_lo"), input_info("out_hi"), 255, data_types::i8), reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32) @@ -172,7 +172,7 @@ TEST_P(gather_eltwise_activation, basic) { input_layout("input", get_input_layout(p)), data("gather_indices", get_mem(get_indices_layout(p), 0, static_cast(get_axis_dim(p) - 1))), data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)), - gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.out_shape), + gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.dictionary_shape.size(), p.out_shape), activation("activation", input_info("gather_prim"), activation_func::abs), eltwise("eltwise", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::prod), reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) @@ -220,7 +220,7 @@ TEST_P(gather_eltwise_activation_dynamic, basic) { input_layout("input", get_input_layout(p, true)), input_layout("gather_indices", layout{ ov::PartialShape::dynamic(p.indices_shape.size()), p.data_type, format::bfyx }), input_layout("eltwise_data", get_per_channel_layout(p, true)), - gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.out_shape), + gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.dictionary_shape.size(), p.out_shape), activation("activation", input_info("gather_prim"), activation_func::abs), eltwise("eltwise", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::prod), reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp index 19a9deccfc3e33..68d444094f331b 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp @@ -493,6 +493,9 @@ TEST_P(gemm_2in_act_scale_eltwise, basic) { ); tolerance = default_tolerance(p.default_type); + if (p.default_type == data_types::f16 && p.kernel_name == "gemm_tiled_opt") { + tolerance *= 2.1f; // Issue: 94154 + } execute(p, false); } @@ -511,6 +514,9 @@ TEST_P(gemm_2in_act_scale_eltwise, broadcast_eltwise) { ); tolerance = default_tolerance(p.default_type); + if (p.default_type == data_types::f16 && p.kernel_name == "gemm_tiled_opt") { + tolerance *= 2.1f; // Issue: 94154 + } execute(p, false); } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp index 6e980ac7d25d8b..a3635bb320f47f 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/loop_fusion_test.cpp @@ -31,6 +31,35 @@ struct loop_params { size_t expected_not_fused_primitives; }; + +program::ptr build_program(engine& engine, + topology& body_topology, + primitive_id initial_condition_id, + std::vector output_primitive_maps, + std::vector back_edges) { + std::vector output_names_vec; + for (auto out_map : output_primitive_maps) { + output_names_vec.push_back(out_map.internal_id.pid); + } + + // setup outputs for backedges + for (auto& back_edge : back_edges) { + output_names_vec.push_back(back_edge.from); + } + + // if execution_condition_id is specified, we need to add the id in build_option::outputs + if (!initial_condition_id.empty()) { + output_names_vec.push_back(initial_condition_id); + } + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); + config.set_property(ov::intel_gpu::max_dynamic_batch(1)); + + return program::build_program(engine, body_topology, config, false, false, true); +} + class LoopFusingTest : public ::BaseFusingTest { public: @@ -71,6 +100,8 @@ TEST_P(permute_eltwise_loop, basic) { std::vector output_primitive_maps {loop::io_primitive_map("loop", "body_eltwise", 2)}; std::vector back_edges {loop::backedge_mapping("body_eltwise", "body_eltwise_operand")}; + auto body_program = build_program(engine, body, "", output_primitive_maps, back_edges); + create_topologies( input_layout("input", get_input_layout(p)), data("eltwise_data", get_mem(layout{p.data_type, p.default_format, p.loop_input_shape})), @@ -80,7 +111,7 @@ TEST_P(permute_eltwise_loop, basic) { data("trip_count", trip_count_mem), data("initial_condition", initial_condition_mem), mutable_data("num_iteration", num_iteration_mem), - loop("loop", { input_info("eltwise"), input_info("loop_eltwise_init_values") }, body, + loop("loop", { input_info("num_iteration"), input_info("eltwise"), input_info("loop_eltwise_init_values") }, body_program, "trip_count", "initial_condition", "num_iteration", input_primitive_maps, output_primitive_maps, back_edges, p.loop_trip_count), reorder("output", input_info("loop"), format::bfyx, p.default_type) diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp index 11d769f322be93..7fafb55beeccb8 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp @@ -89,11 +89,11 @@ TEST(primitive_comparison, fully_connected) { } TEST(primitive_comparison, gather) { - auto gather_prim = gather("gather", input_info("input0"), input_info("input1"), 2, {1, 3, 224, 224}, 1, true); - auto gather_prim_eq = gather("gather_eq", input_info("input0_eq"), input_info("input1_eq"), 2, {1, 3, 224, 224}, 1, true); - auto gather_prim_axis = gather("gather", input_info("input0"), input_info("input1"), 3, {1, 3, 224, 224}, 1, true); - auto gather_prim_batch_dim = gather("gather", input_info("input0"), input_info("input1"), 2, {1, 3, 224, 224}, 2, true); - auto gather_prim_support_neg_ind = gather("gather", input_info("input0"), input_info("input1"), 2, {1, 3, 224, 224}, 1, false); + auto gather_prim = gather("gather", input_info("input0"), input_info("input1"), 2, {}, {1, 3, 224, 224}, 1, true); + auto gather_prim_eq = gather("gather_eq", input_info("input0_eq"), input_info("input1_eq"), 2, {}, {1, 3, 224, 224}, 1, true); + auto gather_prim_axis = gather("gather", input_info("input0"), input_info("input1"), 3, {}, {1, 3, 224, 224}, 1, true); + auto gather_prim_batch_dim = gather("gather", input_info("input0"), input_info("input1"), 2, {}, {1, 3, 224, 224}, 2, true); + auto gather_prim_support_neg_ind = gather("gather", input_info("input0"), input_info("input1"), 2, {}, {1, 3, 224, 224}, 1, false); ASSERT_EQ(gather_prim, gather_prim_eq); ASSERT_NE(gather_prim, gather_prim_axis); diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp index e6c78b4fd8513e..5e5caa91cd7d3f 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/shape_predictor_test.cpp @@ -25,10 +25,10 @@ TEST_P(shape_predictor_tests, prediction) { ShapePredictor sp(&engine, p.buffers_preallocation_ratio); std::pair result; - const auto dt_size = 4; + const auto dt_bitwidth = ov::element::f32.bitwidth(); for (auto& shape : in_shapes) - result = sp.predict_preallocation_shape("dummy_name", shape, dt_size, p.can_reuse_buffer); + result = sp.predict_preallocation_shape("dummy_name", shape, dt_bitwidth, p.can_reuse_buffer); ASSERT_TRUE(result.first == !expected_predicted_shape.empty()); ASSERT_EQ(result.second, expected_predicted_shape); diff --git a/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp index 4ac5f414c70664..629769d086d9d1 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp @@ -41,11 +41,11 @@ TEST(add_required_reorders, input_reorder_inside_shape_of_subgraph) { topology.add(data("data_0", data_0)); topology.add(data("data_1", data_1)); topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32)); - topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true)); + topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, {}, 0, true)); topology.add(eltwise("eltwise0", {input_info("gather0"), input_info("data_1")}, eltwise_mode::prod, data_types::f32)); topology.add(reshape("reshape0", input_info("eltwise0"), false, {}, ov::PartialShape{1}, reshape::reshape_mode::unsqueeze)); - topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true)); + topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, {}, 0, true)); topology.add(eltwise("eltwise1", {input_info("gather1"), input_info("data_1")}, eltwise_mode::prod, data_types::f32)); topology.add(reshape("reshape1", input_info("eltwise1"), false, {}, ov::PartialShape{1}, reshape::reshape_mode::unsqueeze)); diff --git a/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp index 4fe7598e8af3d8..6b66075c6db26d 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/mark_shape_of_subgraphs_test.cpp @@ -62,7 +62,7 @@ TEST(mark_shape_of_subgraphs, simple_chain) { topology.add(data("data_0", data_0)); topology.add(data("data_1", data_1)); topology.add(shape_of("shape_of", input_info("input"), data_types::i64)); - topology.add(gather("gather", input_info("shape_of"), input_info("data_0"), 0, {})); + topology.add(gather("gather", input_info("shape_of"), input_info("data_0"), 0, 0, {})); topology.add(eltwise("eltwise", input_info("gather"), input_info("data_1"), eltwise_mode::sum)); topology.add(concatenation("concat", {input_info("eltwise"), input_info("data_1")}, 0)); topology.add(broadcast("broadcast", input_info("input"), input_info("concat"), {}, ov::op::BroadcastType::BIDIRECTIONAL)); @@ -103,7 +103,7 @@ TEST(mark_shape_of_subgraphs, simple_chain_w_reshape_inside_subgraph) { topology.add(data("data_0", data_0)); topology.add(data("data_1", data_1)); topology.add(shape_of("shape_of", input_info("input"), data_types::i64)); - topology.add(gather("gather", input_info("shape_of"), input_info("data_0"), 0, {1})); + topology.add(gather("gather", input_info("shape_of"), input_info("data_0"), 0, 1, {1})); topology.add(reshape("reshape", input_info("gather"), input_info("data_1"), false, ov::PartialShape{2})); topology.add(broadcast("broadcast", input_info("input"), input_info("reshape"), {}, ov::op::BroadcastType::BIDIRECTIONAL)); @@ -129,8 +129,8 @@ TEST(mark_shape_of_subgraphs, parallel_shape_of_subgraphs) { topology.add(data("data_0", data_0)); topology.add(shape_of("shape_of_0", input_info("input"), data_types::i64)); topology.add(shape_of("shape_of_1", input_info("input"), data_types::i64)); - topology.add(gather("gather_0", input_info("shape_of_0"), input_info("data_0"), 0, {})); - topology.add(gather("gather_1", input_info("shape_of_1"), input_info("data_0"), 0, {})); + topology.add(gather("gather_0", input_info("shape_of_0"), input_info("data_0"), 0, 0, {})); + topology.add(gather("gather_1", input_info("shape_of_1"), input_info("data_0"), 0, 0, {})); topology.add(eltwise("eltwise", input_info("gather_0"), input_info("gather_1"), eltwise_mode::sum)); topology.add(reshape("reshape", input_info("input"), input_info("eltwise"), false, ov::PartialShape())); @@ -160,9 +160,9 @@ TEST(mark_shape_of_subgraphs, parallel_shape_of_subgraphs_cascade) { topology.add(data("data_1", data_1)); topology.add(data("data_2", data_2)); topology.add(shape_of("shape_of_0", input_info("input"), data_types::i64)); - topology.add(gather("gather_0", input_info("shape_of_0"), input_info("data_0"), 0, {1})); + topology.add(gather("gather_0", input_info("shape_of_0"), input_info("data_0"), 0, 1, {1})); topology.add(shape_of("shape_of_1", input_info("input"), data_types::i64)); - topology.add(gather("gather_1", input_info("shape_of_1"), input_info("data_0"), 0, {1})); + topology.add(gather("gather_1", input_info("shape_of_1"), input_info("data_0"), 0, 1, {1})); topology.add(scatter_update("scatter_update_0", input_info("gather_0"), input_info("data_0"), input_info("data_0"), 0)); topology.add(scatter_update("scatter_update_1", input_info("gather_1"), input_info("data_0"), input_info("data_0"), 0)); topology.add(strided_slice("strided_slice_1", @@ -171,7 +171,7 @@ TEST(mark_shape_of_subgraphs, parallel_shape_of_subgraphs_cascade) { input_info("scatter_update_1"), input_info("data_0"), {}, {}, {}, {}, {}, {})); topology.add(shape_of("shape_of_2", input_info("input"), data_types::i64)); - topology.add(gather("gather_2", input_info("shape_of_2"), input_info("data_0"), 0, {})); + topology.add(gather("gather_2", input_info("shape_of_2"), input_info("data_0"), 0, 0, {})); topology.add(scatter_update("scatter_update_2", input_info("gather_2"), input_info("data_0"), input_info("data_0"), 0)); topology.add(strided_slice("strided_slice_2", input_info("data_1"), @@ -207,7 +207,7 @@ TEST(mark_shape_of_subgraphs, simple_chain_w_inserted_reorder) { topology.add(input_layout("input", input_layout_dynamic)); topology.add(data("data_0", data_0)); topology.add(shape_of("shape_of", input_info("input"), data_types::i64)); - topology.add(gather("gather", input_info("shape_of"), input_info("data_0"), 0, {1})); + topology.add(gather("gather", input_info("shape_of"), input_info("data_0"), 0, 1, {1})); topology.add(reshape("reshape", input_info("gather"), true, {}, {})); topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f16)); topology.add(eltwise("eltwise", input_info("reorder"), input_info("data_0"), eltwise_mode::prod)); @@ -237,7 +237,7 @@ TEST(mark_shape_of_subgraphs, concat_with_empty_tensor_inputs) { topology.add(input_layout("input_empty", input_layout_empty)); topology.add(data("data_0", data_0)); topology.add(shape_of("shape_of_01", input_info("input"), data_types::i64)); - topology.add(gather("gather01", input_info("shape_of_01"), input_info("data_0"), 0, {1})); + topology.add(gather("gather01", input_info("shape_of_01"), input_info("data_0"), 0, 1, {1})); topology.add(shape_of("shape_of_02", input_info("input_empty"), data_types::i64)); topology.add(shape_of("shape_of_03", input_info("input_empty"), data_types::i64)); topology.add(concatenation("concat", {input_info("gather01"), input_info("shape_of_02"), input_info("shape_of_03")}, 0)); diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index 1866ddb6c19870..9d7aef3e2b68bf 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -633,13 +633,13 @@ TEST(prepare_buffer_fusing, skip_in_place_concat_inside_shape_of_subgraph) { topology.add(data("data_1", data_1)); topology.add(data("data_2", data_2)); topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32)); - topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true)); + topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, 0, {}, 0, true)); topology.add(reorder("reorder0", input_info("gather0"), format::any, data_types::f32, std::vector(), reorder_mean_mode::subtract, padding(), true)); topology.add(eltwise("eltwise0", input_info("reorder0"), input_info("data_1"), eltwise_mode::prod, broadcast_spec)); topology.add(reshape("reshape0", input_info("eltwise0"), false, {}, ov::PartialShape{1}, reshape::reshape_mode::unsqueeze)); - topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true)); + topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, 0, {}, 0, true)); topology.add(reorder("reorder1", input_info("gather1"), format::any, data_types::f32, std::vector(), reorder_mean_mode::subtract, padding(), true)); topology.add(eltwise("eltwise1", input_info("reorder1"), input_info("data_1"), eltwise_mode::prod, broadcast_spec)); @@ -693,7 +693,7 @@ TEST(prepare_buffer_fusing, test_implicit_crop_and_outerpadding) { topology.add(input_layout("Input", in_input->get_layout())); topology.add(input_layout("Input_idx_1", input_idx1->get_layout())); topology.add(reorder("reorder_input", input_info("Input"), format::bfzyx, data_types::f32)); - topology.add(gather("gather1", input_info("reorder_input"), input_info("Input_idx_1"), axis, ov::Shape{1, 6, 2, 2, 2})); + topology.add(gather("gather1", input_info("reorder_input"), input_info("Input_idx_1"), axis, 5, ov::Shape{1, 6, 2, 2, 2})); topology.add(reorder("gather1_reorder", input_info("gather1"), reorder_layout)); topology.add(reshape("reshape1", input_info("gather1_reorder"), tensor(6, 2, 2, 2))); topology.add(crop("crop", input_info("reorder_input"), tensor{1, 6, 2, 2, 2}, tensor(1, 0, 0, 0, 0))); diff --git a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp index 1d5bdd88209668..ee454a74e96635 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/remove_redundant_reorders_tests.cpp @@ -346,13 +346,13 @@ TEST(remove_redundant_reorders, not_to_fuse_concat_with_reorder_inside_shape_of_ topology.add(data("data_1", data_1)); topology.add(data("data_2", data_2)); topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32)); - topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true)); + topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, {}, 0, true)); topology.add(reorder("reorder0", input_info("gather0"), format::any, data_types::f32, std::vector(), reorder_mean_mode::subtract, padding(), true)); topology.add(eltwise("eltwise0", input_info("reorder0"), input_info("data_1"), eltwise_mode::prod, broadcast_spec)); topology.add(reshape("reshape0", input_info("eltwise0"), false, {}, ov::PartialShape{1}, reshape::reshape_mode::unsqueeze)); - topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true)); + topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, {}, 0, true)); topology.add(reorder("reorder1", input_info("gather1"), format::any, data_types::f32, std::vector(), reorder_mean_mode::subtract, padding(), true)); topology.add(eltwise("eltwise1", input_info("reorder1"), input_info("data_1"), eltwise_mode::prod, broadcast_spec)); diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp index a6efbbc98a5de3..4406605784a22f 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp @@ -136,8 +136,8 @@ TEST(reorder_inputs, mixed_ranks_gather) { ov::CoordinateDiff{0, 0}, false)); topology.add(border("pad", { input_info("conv") }, 0, ov::CoordinateDiff{0, 0, 1, 1}, ov::CoordinateDiff{0, 0, 1, 1})); - topology.add(gather("gather1", input_info("pad"), input_info("data1"), 2, { 1, 2, 3, 128, 57 }, 0, false)); - topology.add(gather("gather2", input_info("gather1"), input_info("data2"), 4, { 1, 2, 3, 128, 3, 55 }, 0, false)); + topology.add(gather("gather1", input_info("pad"), input_info("data1"), 2, 4, { 1, 2, 3, 128, 57 }, 0, false)); + topology.add(gather("gather2", input_info("gather1"), input_info("data2"), 4, 5, { 1, 2, 3, 128, 3, 55 }, 0, false)); topology.add(permute("permute", input_info("gather2"), {0, 1, 2, 4, 3, 5})); ExecutionConfig config = get_test_default_config(engine); @@ -155,10 +155,10 @@ TEST(reorder_inputs, mixed_ranks_gather) { auto& gather1_node = prog_impl->get_node("gather1"); auto& gather2_node = prog_impl->get_node("gather2"); - ASSERT_EQ(gather1_node.get_input_layouts()[0].format, format::bfzyx); + ASSERT_EQ(gather1_node.get_input_layouts()[0].format, format::bfyx); ASSERT_EQ(gather1_node.get_output_layout().format, format::bfzyx); - ASSERT_EQ(gather2_node.get_input_layouts()[0].format, format::bfwzyx); + ASSERT_EQ(gather2_node.get_input_layouts()[0].format, format::bfzyx); ASSERT_EQ(gather2_node.get_output_layout().format, format::bfwzyx); } diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/gather_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/gather_si_test.cpp index aedcfb9d4dce5c..a1852c0c4561a5 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/gather_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/gather_si_test.cpp @@ -37,7 +37,7 @@ TEST_P(gather_test, shape_infer) { auto input0_layout_prim = std::make_shared("input0", p.in0_layout); auto input1_layout_prim = std::make_shared("input1", p.in1_layout); - auto gather_prim = std::make_shared("output", input_info("input0"), input_info("input1"), p.axis, ov::Shape{}, p.batch_dim); + auto gather_prim = std::make_shared("output", input_info("input0"), input_info("input1"), p.axis, 0, ov::Shape{}, p.batch_dim); cldnn::program prog(engine); diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/random_uniform_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/random_uniform_si_test.cpp index 6597351a0c728c..f41cafc9e2d4f9 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/random_uniform_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/random_uniform_si_test.cpp @@ -67,19 +67,19 @@ TEST_P(random_uniform_si_test, shape_infer) { set_values(allocated_mem, {ov::float16(val).to_bits()}); break; case data_types::f32: - set_values(allocated_mem, {static_cast::type>(val)}); + set_values(allocated_mem, {static_cast::value_type>(val)}); break; case data_types::i32: - set_values(allocated_mem, {static_cast::type>(val)}); + set_values(allocated_mem, {static_cast::value_type>(val)}); break; case data_types::i64: - set_values(allocated_mem, {static_cast::type>(val)}); + set_values(allocated_mem, {static_cast::value_type>(val)}); break; case data_types::i8: - set_values(allocated_mem, {static_cast::type>(val)}); + set_values(allocated_mem, {static_cast::value_type>(val)}); break; case data_types::u8: - set_values(allocated_mem, {static_cast::type>(val)}); + set_values(allocated_mem, {static_cast::value_type>(val)}); break; default: break; diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/range_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/range_si_test.cpp index 2430d628aa2c42..b079017d5c12e0 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/range_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/range_si_test.cpp @@ -66,19 +66,19 @@ TEST_P(range_si_test, shape_infer) { set_values(prim_mem, {ov::float16(p.vals[idx]).to_bits()}); break; case data_types::f32: - set_values(prim_mem, {static_cast::type>(p.vals[idx])}); + set_values(prim_mem, {static_cast::value_type>(p.vals[idx])}); break; case data_types::i32: - set_values(prim_mem, {static_cast::type>(p.vals[idx])}); + set_values(prim_mem, {static_cast::value_type>(p.vals[idx])}); break; case data_types::i64: - set_values(prim_mem, {static_cast::type>(p.vals[idx])}); + set_values(prim_mem, {static_cast::value_type>(p.vals[idx])}); break; case data_types::i8: - set_values(prim_mem, {static_cast::type>(p.vals[idx])}); + set_values(prim_mem, {static_cast::value_type>(p.vals[idx])}); break; case data_types::u8: - set_values(prim_mem, {static_cast::type>(p.vals[idx])}); + set_values(prim_mem, {static_cast::value_type>(p.vals[idx])}); break; default: break; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/canonicalization_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/canonicalization_gpu_test.cpp index 400152887e6ce9..933161185c78a8 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/canonicalization_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/canonicalization_gpu_test.cpp @@ -220,7 +220,7 @@ TEST(canonicalization, gather) { topology.add(input_layout("data", data_layout)); topology.add(input_layout("indices", indices_layout)); topology.add(gather("gather", input_info("data"), input_info("indices"), params.second.axis, - ov::Shape{}, params.second.batch_dim, params.second.support_neg_ind)); + 0, ov::Shape{}, params.second.batch_dim, params.second.support_neg_ind)); canonicalization_test(topology, "gather", std::get<1>(params.first), std::get<2>(params.first)); } @@ -254,9 +254,9 @@ TEST(canonicalization, fusing_gather_eltwise) { topology.add(input_layout("indices_second", indices_layout_second)); topology.add(input_layout("data", input_mul_layout)); topology.add(gather("gather_first", input_info("input"), input_info("indices_first"), shapes.second.axis, - shapes.second.out_shape, shapes.second.batch_dim, shapes.second.support_neg_ind)); + shapes.second.data_shape.rank().get_length(), shapes.second.out_shape, shapes.second.batch_dim, shapes.second.support_neg_ind)); topology.add(gather("gather_second", input_info("input"), input_info("indices_second"), shapes.second.axis, - shapes.second.out_shape, shapes.second.batch_dim, shapes.second.support_neg_ind)); + shapes.second.data_shape.rank().get_length(), shapes.second.out_shape, shapes.second.batch_dim, shapes.second.support_neg_ind)); topology.add(eltwise("mul", {input_info("gather_first"), input_info("data")}, eltwise_mode::prod)); topology.add(eltwise("add", {input_info("gather_second"), input_info("mul")}, eltwise_mode::sum)); topology.add(reorder("out_reorder", input_info("add"), format::bfyx, data_types::f32)); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp index b42241b23f1e99..fda7c1c41e5c12 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp @@ -28,6 +28,7 @@ bool is_output_equal(const cldnn::memory::ptr mem, const std::vector& ref) return true; } + topology generate_simple_branch (bool branch_true_false, const primitive_id& id, const primitive_id& input_id, const data_types dt = data_types::f32) { topology branch; @@ -86,14 +87,14 @@ class condition_gpu_basic_test : public ::testing::Test { condition::branch branch_true; { cldnn::topology branch_true_topology = generate_simple_branch(true, cond_id, branch_input_id, dat_dt); - branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true); + branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({input_id, branch_input_id}); branch_true.output_map.insert({0, "condi_when_true"}); } condition::branch branch_false; { cldnn::topology branch_false_topology = generate_simple_branch(false, cond_id, branch_input_id, dat_dt); - branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true); + branch_false.inner_program = program::build_program(engine, branch_false_topology, config, false, false, true); branch_false.input_map.insert({input_id, branch_input_id}); branch_false.output_map.insert({0, "condi_when_false"}); } @@ -178,14 +179,14 @@ TEST(condition_gpu, basic_range_equal_comp) { condition::branch branch_true; { cldnn::topology branch_true_topology = generate_simple_branch(true, condi_id, branch_input_id); - branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true); + branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({concat_id, branch_input_id}); branch_true.output_map.insert({0, "condi_when_true"}); } condition::branch branch_false; { cldnn::topology branch_false_topology = generate_simple_branch(false, condi_id, branch_input_id); - branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true); + branch_false.inner_program = program::build_program(engine, branch_false_topology, config, false, false, true); branch_false.input_map.insert({concat_id, branch_input_id}); branch_false.output_map.insert({0, "condi_when_false"}); } @@ -258,8 +259,8 @@ TEST(condition_gpu, basic_stacked_ifs) { auto predicate2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); primitive_id input_id = "input"; - primitive_id pred_id = "predicate"; - primitive_id predicate2_id = "predicate2"; + primitive_id pred_id = "predicate"; + primitive_id predicate2_id = "predicate2"; primitive_id branch_input_id = "branch_input"; primitive_id cond_id = "condi"; primitive_id cond2_id = "condi2"; @@ -280,22 +281,22 @@ TEST(condition_gpu, basic_stacked_ifs) { ); condition::branch branch_condi_1_true; - branch_condi_1_true.inner_program = program::build_program(engine, condi_1_true, config, true); + branch_condi_1_true.inner_program = program::build_program(engine, condi_1_true, config, false, false, true); branch_condi_1_true.input_map.insert({input_id, branch_input_id}); branch_condi_1_true.output_map.insert({0, "condi_when_true"}); condition::branch branch_condi_1_false; - branch_condi_1_false.inner_program = program::build_program(engine, condi_1_false, config, true); + branch_condi_1_false.inner_program = program::build_program(engine, condi_1_false, config, false, false, true); branch_condi_1_false.input_map.insert({input_id, branch_input_id}); branch_condi_1_false.output_map.insert({0, "condi_when_false"}); condition::branch branch_condi_2_true; - branch_condi_2_true.inner_program = program::build_program(engine, condi_2_true, config, true); + branch_condi_2_true.inner_program = program::build_program(engine, condi_2_true, config, false, false, true); branch_condi_2_true.input_map.insert({cond_id, branch_input_id}); branch_condi_2_true.output_map.insert({0, "activ_when_true"}); condition::branch branch_condi_2_false; - branch_condi_2_false.inner_program = program::build_program(engine, condi_2_false, config, true); + branch_condi_2_false.inner_program = program::build_program(engine, condi_2_false, config, false, false, true); branch_condi_2_false.input_map.insert({cond_id, branch_input_id}); branch_condi_2_false.output_map.insert({0, "activ_when_false"}); @@ -373,7 +374,7 @@ TEST(condition_gpu, basic_nested_ifs) { data("scale_5_data", scale_5_mem), eltwise("scale_5", { input_info("branch_input1"), input_info("scale_5_data") }, eltwise_mode::prod) ); - nested_true.inner_program = program::build_program(engine, nested_true_topology, config, true); + nested_true.inner_program = program::build_program(engine, nested_true_topology, config, false, false, true); nested_true.input_map.insert({"pooling_when_true", "branch_input1"}); nested_true.output_map.insert({0, "scale_5"}); } @@ -385,7 +386,7 @@ TEST(condition_gpu, basic_nested_ifs) { data("scale_10_data", scale_10_mem), eltwise("scale_10", { input_info("branch_input2"), input_info("scale_10_data") }, eltwise_mode::prod) ); - nested_false.inner_program = program::build_program(engine, nested_false_topology, config, true); + nested_false.inner_program = program::build_program(engine, nested_false_topology, config, false, false, true); nested_false.input_map.insert({"pooling_when_true", "branch_input2"}); nested_false.output_map.insert({0, "scale_10"}); } @@ -399,7 +400,7 @@ TEST(condition_gpu, basic_nested_ifs) { input_layout("predicate2", predicate2->get_layout()), condition( "condi_nested", {input_info("predicate2"), input_info("pooling_when_true")}, nested_true, nested_false) ); - branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true); + branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({"input", "branch_input3"}); branch_true.output_map.insert({0, "condi_nested"}); } @@ -411,7 +412,7 @@ TEST(condition_gpu, basic_nested_ifs) { input_layout("branch_input4", { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }), pooling("pooling_when_false", input_info("branch_input4"), cldnn::pooling_mode::average, { 1, 2 }, { 1, 2 }) ); - branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true); + branch_false.inner_program = program::build_program(engine, branch_false_topology, config, false, false, true); branch_false.input_map.insert({"input", "branch_input4"}); branch_false.output_map.insert({0, "pooling_when_false"}); } @@ -460,21 +461,21 @@ TEST(condition_gpu, negative_predicate_wrong_layout) { auto predicate = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } }); primitive_id input_id = "input"; - primitive_id pred_id = "predicate"; + primitive_id pred_id = "predicate"; primitive_id branch_input_id = "branch_input"; primitive_id cond_id = "condi"; condition::branch branch_true; { cldnn::topology branch_true_topology = generate_simple_branch(true, cond_id, branch_input_id, data_types::f32); - branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true); + branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({input_id, branch_input_id}); branch_true.output_map.insert({0, "condi_when_true"}); } condition::branch branch_false; { cldnn::topology branch_false_topology = generate_simple_branch(false, cond_id, branch_input_id, data_types::f32); - branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true); + branch_false.inner_program = program::build_program(engine, branch_false_topology, config, false, false, true); branch_false.input_map.insert({input_id, branch_input_id}); branch_false.output_map.insert({0, "condi_when_false"}); } @@ -501,7 +502,7 @@ TEST(condition_gpu, negative_not_same_layouts) { auto predicate = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } }); primitive_id input_id = "input"; - primitive_id pred_id = "predicate"; + primitive_id pred_id = "predicate"; primitive_id branch_input_id = "branch_input"; primitive_id cond_id = "condi"; @@ -513,7 +514,7 @@ TEST(condition_gpu, negative_not_same_layouts) { input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }), pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 2 }, { 1, 2 }) ); - branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true); + branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({input_id, branch_input_id}); branch_true.output_map.insert({0, pool_id}); } @@ -526,7 +527,7 @@ TEST(condition_gpu, negative_not_same_layouts) { input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }), pooling(pool_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 1, 4 }, { 1, 4 }) ); - branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true); + branch_false.inner_program = program::build_program(engine, branch_false_topology, config, false, false, true); branch_false.input_map.insert({input_id, branch_input_id}); branch_false.output_map.insert({0, pool_id}); } @@ -566,7 +567,7 @@ TEST(condition_gpu, negative_same_names_within_different_networks) { input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }), pooling(duplicated_id, input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 }) ); - branch_true.inner_program = program::build_program(engine, branch_true_topology, config, true); + branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({input_id, branch_input_id}); branch_true.output_map.insert({0, duplicated_id}); } @@ -578,7 +579,7 @@ TEST(condition_gpu, negative_same_names_within_different_networks) { input_layout(branch_input_id, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }), pooling("pooling_when_false", input_info(branch_input_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 }) ); - branch_false.inner_program = program::build_program(engine, branch_false_topology, config, true); + branch_false.inner_program = program::build_program(engine, branch_false_topology, config, false, false, true); branch_false.input_map.insert({input_id, branch_input_id}); branch_false.output_map.insert({0, "pooling_when_false"}); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp index c1309b720daf16..5da00e8fb739f3 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp @@ -2482,7 +2482,7 @@ TEST(eltwise_gpu_int, div_gather_fusing) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add(input_layout("Input3", input3->get_layout())); - topology.add(gather("gather", input_info("InputDictionary"), input_info("InputText"), 0, ov::Shape{2, 2, 2, 2})); + topology.add(gather("gather", input_info("InputDictionary"), input_info("InputText"), 0, 4, ov::Shape{2, 2, 2, 2})); topology.add(reorder("gather_reorder", input_info("gather"), { data_types::i32, format::bfyx, { 2, 2, 2, 2 } })); topology.add(eltwise("eltwise", { input_info("gather_reorder"), input_info("Input3") }, diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 71301447bb28b9..dc23440c48af67 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -663,21 +663,22 @@ TEST(fully_connected_gpu, compressed_scale_zp_bias) { auto& engine = get_test_engine(); auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); - auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); - auto scale_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); - auto zp_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto zp_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, 0.5f, -2.0f, -0.5f, -1.0f }); - set_values(weights_mem, { 1.5f, 1.0f, 0.5f, -1.0f, - 0.0f, 0.5f, 0.5f, -0.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - -1.5f, -1.0f, -0.5f, 1.0f, - 0.0f, -0.5f, 0.5f, 0.5f }); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); + set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f }); set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f }); @@ -709,8 +710,7 @@ TEST(fully_connected_gpu, compressed_scale_zp_bias) { ov::PartialShape expected_shape{1, 2, 8}; ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); - std::vector expected_result = {-4.0f, -23.0f, 11.0f, 0.0f, -2.0f, -3.5f, -30.0f, -10.5f, - 6.0f, 19.0f, -5.0f, -8.0f, 12.0f, -8.5f, 44.0f, 14.5f}; + std::vector expected_result = {13.f, 58.f, -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, -11.f, -62.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f, }; for (size_t i = 0; i < expected_result.size(); i++) { ASSERT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; @@ -721,20 +721,20 @@ TEST(fully_connected_gpu, compressed_scale_bias) { auto& engine = get_test_engine(); auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); - auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); auto scale_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, 0.5f, -2.0f, -0.5f, -1.0f }); - set_values(weights_mem, { 1.5f, 1.0f, 0.5f, -1.0f, - 0.0f, 0.5f, 0.5f, -0.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - -1.5f, -1.0f, -0.5f, 1.0f, - 0.0f, -0.5f, 0.5f, 0.5f }); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }); set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 1.0f }); @@ -764,8 +764,7 @@ TEST(fully_connected_gpu, compressed_scale_bias) { ov::PartialShape expected_shape{1, 2, 8}; ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); - std::vector expected_result = {2.0f, 1.0f, -1.0f, -12.0f, 4.0f, -5.0f, 6.0f, -8.25f, - 0.0f, -5.0f, 7.0f, 4.0f, 6.0f, -7.0f, 8.0f, -7.75f}; + std::vector expected_result = {19.f, 40.f, 69.f, 54.f, 83.f, 48.f, 37.f, -2.f, -17.f, -44.f, -63.f, -62.f, -73.f, -60.f, -23.f, -14.f }; for (size_t i = 0; i < expected_result.size(); i++) { ASSERT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; @@ -776,19 +775,19 @@ TEST(fully_connected_gpu, compressed_scale_fp16) { auto& engine = get_test_engine(); auto input_mem = engine.allocate_memory({ { 2, 4}, data_types::f16, format::bfyx }); - auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f16, format::bfyx }); - auto scale_mem = engine.allocate_memory({ {1, 8}, data_types::f16, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f16, format::bfyx }); set_values(input_mem, { ov::float16(-0.5f), ov::float16(2.0f), ov::float16(0.5f), ov::float16(1.0f), ov::float16(0.5f), ov::float16(-2.0f), ov::float16(-0.5f), ov::float16(-1.0f) }); - set_values(weights_mem, {ov::float16( 1.5f), ov::float16( 1.0f), ov::float16( 0.5f), ov::float16(-1.0f), - ov::float16( 0.0f), ov::float16( 0.5f), ov::float16( 0.5f), ov::float16(-0.5f), - ov::float16(-2.0f), ov::float16(-0.5f), ov::float16( 1.0f), ov::float16( 1.5f), - ov::float16(-2.0f), ov::float16(-0.5f), ov::float16( 1.0f), ov::float16( 1.5f), - ov::float16( 2.0f), ov::float16( 0.5f), ov::float16(-1.0f), ov::float16(-1.5f), - ov::float16( 2.0f), ov::float16( 0.5f), ov::float16(-1.0f), ov::float16(-1.5f), - ov::float16(-1.5f), ov::float16(-1.0f), ov::float16(-0.5f), ov::float16( 1.0f), - ov::float16( 0.0f), ov::float16(-0.5f), ov::float16(0.5f), ov::float16( 0.5f) }); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); set_values(scale_mem, {ov::float16(2.0f), ov::float16(4.0f), ov::float16(-2.0f), ov::float16(-4.0f), ov::float16(0.5f), ov::float16(-0.5f), ov::float16(2.0f), ov::float16(2.0f)}); @@ -817,8 +816,8 @@ TEST(fully_connected_gpu, compressed_scale_fp16) { ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); std::vector expected_result = { - ov::float16(1.0f), ov::float16( 3.0f), ov::float16(-4.0f), ov::float16(-8.0f), ov::float16(-1.0f), ov::float16( 1.0f), ov::float16(-1.0f), ov::float16(-0.5f), - ov::float16(-1.0f), ov::float16(-3.0f), ov::float16( 4.0f), ov::float16( 8.0f), ov::float16( 1.0f), ov::float16(-1.0f), ov::float16( 1.0f), ov::float16( 0.5f)}; + ov::float16(18), ov::float16(84), ov::float16(-66), ov::float16(-116), ov::float16(19.5), ov::float16(-13.5), ov::float16(30), ov::float16(6), + ov::float16(-18), ov::float16(-84), ov::float16(66), ov::float16(116), ov::float16(-19.5), ov::float16(13.5), ov::float16(-30), ov::float16(-6) }; for (size_t i = 0; i < expected_result.size(); i++) { ASSERT_FLOAT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gather_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gather_gpu_test.cpp index bbef9e78912d5b..75465b89ae51ff 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gather_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gather_gpu_test.cpp @@ -88,6 +88,7 @@ class gather8_test : public ::testing::TestWithParam { input_info("reorder0"), input_info("reorder1"), axis, + shape_in[0].size(), ov::Shape(shape_out.begin(), shape_out.end()), batch_dim, true)); @@ -108,7 +109,7 @@ class gather8_test : public ::testing::TestWithParam { planar_topo.add(input_layout("input0", input0->get_layout())); planar_topo.add(input_layout("input1", input1->get_layout())); planar_topo.add( - gather("gather", input_info("input0"), input_info("input1"), axis, ov::Shape(shape_out.begin(), shape_out.end()), batch_dim, true)); + gather("gather", input_info("input0"), input_info("input1"), axis, shape_in[0].size(), ov::Shape(shape_out.begin(), shape_out.end()), batch_dim, true)); network planar_network(engine, planar_topo, get_test_default_config(engine)); planar_network.set_input_data("input0", input0); planar_network.set_input_data("input1", input1); @@ -408,7 +409,7 @@ TEST(gather8_gpu_fp16, d323_axisY_bdim_m1) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 5, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes) ); network network(engine, topology, get_test_default_config(engine)); @@ -515,7 +516,7 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2, 2, 2}, batch_dim) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 6, ov::Shape{2, 2, 2, 2, 2, 2}, batch_dim) ); network network(engine, topology, get_test_default_config(engine)); @@ -626,7 +627,7 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 5, ov::Shape{3, 2, 3, 3, 2}, batch_dim) ); network network(engine, topology, get_test_default_config(engine)); @@ -730,7 +731,7 @@ TEST(gather7_gpu_fp16, d44_axisY_bdim1) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{4, 3, 4, 1, 1, 1}, batch_dim) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{4, 3, 4, 1, 1, 1}, batch_dim) ); network network(engine, topology, get_test_default_config(engine)); @@ -805,7 +806,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim_m1) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 1, 1}, batch_dim) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{3, 2, 1, 1}, batch_dim) ); network network(engine, topology, get_test_default_config(engine)); @@ -868,7 +869,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim1) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 1, 1}, batch_dim) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{3, 2, 1, 1}, batch_dim) ); network network(engine, topology, get_test_default_config(engine)); @@ -930,7 +931,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim0) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 3, 2, 1}, batch_dim) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{3, 3, 2, 1}, batch_dim) ); network network(engine, topology, get_test_default_config(engine)); @@ -998,7 +999,7 @@ TEST(gather_gpu_fp16, d14_axisB) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{1, 4, 2, 1}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1060,7 +1061,7 @@ TEST(gather_gpu_fp16, d222_axisB) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1121,7 +1122,7 @@ TEST(gather_gpu_fp16, d22_axisY) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1182,7 +1183,7 @@ TEST(gather_gpu_fp16, d22_axisF) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1240,7 +1241,7 @@ TEST(gather_gpu_fp32, d14_axisB) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{1, 4, 2, 1}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1301,7 +1302,7 @@ TEST(gather_gpu_fp32, d222_axisB) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1362,7 +1363,7 @@ TEST(gather_gpu_fp32, d22_axisY) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1423,7 +1424,7 @@ TEST(gather_gpu_fp32, d22_axisF) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1484,7 +1485,7 @@ TEST(gather_gpu_int32, d22_axisF) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1542,7 +1543,7 @@ TEST(gather_gpu_int32, d14_axisB) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{1, 4, 2, 1}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1603,7 +1604,7 @@ TEST(gather_gpu_int32, d222_axisB) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1664,7 +1665,7 @@ TEST(gather_gpu_int32, d22_axisY) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 2, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1728,7 +1729,7 @@ TEST(gather_gpu_fp32, d41_axisB) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{4, 1, 2, 3}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{4, 1, 2, 3}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1791,7 +1792,7 @@ TEST(gather_gpu_fp32, d41_axisF) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 4, 1, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 4, 1, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1850,7 +1851,7 @@ TEST(gather_gpu_fp32, d2_axisX) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 1, 2}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{2, 2, 1, 2}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1900,7 +1901,7 @@ TEST(gather_gpu_fp32, 322_axisF) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{3, 2, 2, 1}) ); network network(engine, topology, get_test_default_config(engine)); @@ -1940,7 +1941,7 @@ TEST(gather_gpu_fp32, dynamic_322_axisF) { topology topology; topology.add(input_layout("input1", in1_layout)); topology.add(input_layout("input2", in2_layout)); - topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, ov::Shape{})); + topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, 0, ov::Shape{})); ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); @@ -1983,7 +1984,7 @@ TEST(gather_gpu_fp32, indice_out_of_bound) { topology topology; topology.add(input_layout("input1", in1_layout)); topology.add(input_layout("input2", in2_layout)); - topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, ov::Shape{}, 0, true)); + topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, 0, ov::Shape{}, 0, true)); ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); @@ -2021,7 +2022,7 @@ TEST(gather_cpu_impl_fp32, dynamic_322_axisF) { topology topology; topology.add(input_layout("input1", in1_layout)); topology.add(input_layout("input2", in2_layout)); - topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, ov::Shape{})); + topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, 0, ov::Shape{})); auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); @@ -2071,7 +2072,7 @@ void test_gather_gpu_u8_322_axisF(bool is_caching_test) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1})); + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{3, 2, 2, 1})); cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); @@ -2121,7 +2122,7 @@ TEST(gather_single_axis, simple_Baxis) { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 2, 2, 1}) + gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, 4, ov::Shape{1, 2, 2, 1}) ); topology.add(reorder("reorder", input_info("gather"), format::bfyx, data_types::i8)); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/group_normalization_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/group_normalization_gpu_test.cpp new file mode 100644 index 00000000000000..a13c1d1550882f --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/test_cases/group_normalization_gpu_test.cpp @@ -0,0 +1,146 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" +#include "random_generator.hpp" +#include +#include +#include "openvino/reference/group_normalization.hpp" +#include "compilation_context.hpp" + + +using namespace cldnn; +using namespace ::tests; + +namespace { + +typedef std::tuple< +std::vector, // Input shape +std::size_t, // Number of groups +double, // Epsilon +format // First input layout +> +GroupNormalizationParams; + +class GroupNormalizationGPUTest : public ::testing::TestWithParam { +public: + GroupNormalizationGPUTest() = default; + + void SetUp() override { + std::vector input_shape; + const auto& params = GetParam(); + std::tie(input_shape, num_groups_, epsilon_, format_) = params; + std::copy(std::begin(input_shape), std::end(input_shape), std::back_inserter(data_shape_)); + tests::random_generator rg{"GroupNormalizationGPUTest"}; + data_ = rg.generate_random_1d(ov::shape_size(input_shape), -1, 1); + scale_ = rg.generate_random_1d(input_shape[1], -1, 1); + bias_ = rg.generate_random_1d(input_shape[1], -1, 1); + const auto planar_format = format::dimension(format_) == 4 ? format::bfyx : format::bfzyx; + + topology tp; + auto &engine = get_test_engine(); + data_layout_ = layout{data_types::f32, planar_format, tensor{input_shape}}; + scale_bias_layout_ = layout{data_types::f32, planar_format, tensor{1, + static_cast(scale_.size()), 1, 1}}; + + primitive_id reordered_data_primitive = data_primitive_ + "_reordered"; + tp.add(input_layout{data_primitive_, data_layout_}); + tp.add(input_layout{scale_primitive_, scale_bias_layout_}); + tp.add(input_layout{bias_primitive_, scale_bias_layout_}); + tp.add(reorder{reordered_data_primitive, data_primitive_, format_, data_types::f32}); + + auto g = group_normalization{ + "group_normalization_output", + input_info{reordered_data_primitive}, + input_info{scale_primitive_}, + input_info{bias_primitive_}, + static_cast(num_groups_), + epsilon_ + }; + tp.add(g); + tp.add(reorder{"output", input_info("group_normalization_output"), planar_format, data_types::f32}); + + network_ = std::make_shared(engine, tp, get_test_default_config(engine)); + } + + void Test() { + auto &engine = get_test_engine(); + auto data_gpu_mem = engine.allocate_memory(data_layout_); + auto scale_gpu_mem = engine.allocate_memory(scale_bias_layout_); + auto bias_gpu_mem = engine.allocate_memory(scale_bias_layout_); + set_values(data_gpu_mem, data_); + set_values(scale_gpu_mem, scale_); + set_values(bias_gpu_mem, bias_); + network_->set_input_data(data_primitive_, data_gpu_mem); + network_->set_input_data(scale_primitive_, scale_gpu_mem); + network_->set_input_data(bias_primitive_, bias_gpu_mem); + auto outputs = network_->execute(); + auto output = outputs.at("output").get_memory(); + cldnn::mem_lock output_gpu_mem(output, get_test_stream()); + + std::vector reference_output(data_.size()); + ov::reference::group_normalization(data_.data(), scale_.data(), bias_.data(), reference_output.data(), + ov::Shape{data_shape_}, num_groups_, epsilon_); + + ASSERT_EQ(output_gpu_mem.size(), reference_output.size()); + for (std::size_t i = 0; i < reference_output.size(); i++) { + ASSERT_NEAR(output_gpu_mem[i], reference_output[i], 0.0001); + } + } + +private: + std::vector data_{}; + std::vector scale_{}; + std::vector bias_{}; + std::size_t num_groups_{}; + double epsilon_{}; + format format_{format::any}; + network::ptr network_{}; + layout data_layout_{}; + layout scale_bias_layout_{}; + std::vector data_shape_; + static const primitive_id data_primitive_; + static const primitive_id scale_primitive_; + static const primitive_id bias_primitive_; +}; + +const primitive_id GroupNormalizationGPUTest::data_primitive_{"data"}; +const primitive_id GroupNormalizationGPUTest::scale_primitive_{"scale"}; +const primitive_id GroupNormalizationGPUTest::bias_primitive_{"bias"}; + +TEST_P(GroupNormalizationGPUTest, blocked_layouts_support) { + Test(); +} + +const std::vector f_blocked_4d_formats { + format::b_fs_yx_fsv2, + format::b_fs_yx_fsv4, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, +}; + +const std::vector f_blocked_5d_formats { + format::b_fs_zyx_fsv2, + format::b_fs_zyx_fsv4, + format::b_fs_zyx_fsv16, + format::b_fs_zyx_fsv32, +}; + +INSTANTIATE_TEST_SUITE_P( + GroupNormalizationGPUTest_blocked_layouts_support_4d, GroupNormalizationGPUTest, + ::testing::Combine( + ::testing::Values(std::vector{3, 64, 32, 64}), + ::testing::Values(4), + ::testing::Values(0.0025), + ::testing::ValuesIn(f_blocked_4d_formats))); + +INSTANTIATE_TEST_SUITE_P( + GroupNormalizationGPUTest_blocked_layouts_support_5d, GroupNormalizationGPUTest, + ::testing::Combine( + ::testing::Values(std::vector{3, 64, 28, 32, 12}), + ::testing::Values(4), + ::testing::Values(0.0025), + ::testing::ValuesIn(f_blocked_5d_formats))); + +} // anonymous namespace diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp index a9c1e1262f3aff..68cfc54237737b 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp @@ -94,7 +94,7 @@ class check_hash_value: public ::testing::Test { topology.add(input_layout("InputDictionary", input1->get_layout())); topology.add(input_layout("InputText", input2->get_layout())); topology.add( - gather(key_prim_id, input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes) + gather(key_prim_id, input_info("InputDictionary"), input_info("InputText"), axis, 5, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes) ); cldnn::network::ptr net = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index 94fb17104275e0..240f0df9980e97 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,34 @@ using namespace cldnn; using namespace tests; using namespace testing; +static program::ptr build_program(engine& engine, + topology& body_topology, + primitive_id execution_condition_id, + std::vector output_primitive_maps, + std::vector back_edges) { + std::vector output_names_vec; + for (auto out_map : output_primitive_maps) { + output_names_vec.push_back(out_map.internal_id.pid); + } + + // setup outputs for backedges + for (auto& back_edge : back_edges) { + output_names_vec.push_back(back_edge.from); + } + + // if execution_condition_id is specified, we need to add the id in build_option::outputs + if (!execution_condition_id.empty()) { + output_names_vec.push_back(execution_condition_id); + } + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); + config.set_property(ov::intel_gpu::max_dynamic_batch(1)); + + return program::build_program(engine, body_topology, config, false, false, true); +} + template void test_loop_gpu_basic_no_concat(bool is_caching_test) { @@ -52,23 +81,23 @@ void test_loop_gpu_basic_no_concat(bool is_caching_test) set_values(initial_condition_mem, {initial_condition}); topology body( + input_layout("input", input_mem->get_layout()), data("eltwise_operand", operand_mem), eltwise("eltwise", input_info("input"), input_info("eltwise_operand"), eltwise_mode::sum) ); std::vector input_primitive_maps { loop::io_primitive_map("input", "input") }; std::vector output_primitive_maps { loop::io_primitive_map("loop", "eltwise") }; + std::vector back_edges { loop::backedge_mapping("eltwise", "input") }; - std::vector back_edges { - loop::backedge_mapping("eltwise", "input") - }; + auto body_program = build_program(engine, body, "", output_primitive_maps, back_edges); topology topology( input_layout("input", input_mem->get_layout()), input_layout("trip_count", trip_count_mem->get_layout()), input_layout("initial_condition", initial_condition_mem->get_layout()), mutable_data("num_iteration", num_iteration_mem), - loop("loop", { input_info("input") }, body, + loop("loop", { input_info("num_iteration"), input_info("input") }, body_program, "trip_count", "initial_condition", "num_iteration", input_primitive_maps, output_primitive_maps, back_edges, 8) ); @@ -161,15 +190,16 @@ void test_loop_gpu_basic_concat(bool is_caching_test) std::vector input_primitive_maps { loop::io_primitive_map("input", "input", 2) }; std::vector output_primitive_maps { loop::io_primitive_map("loop", "eltwise", 2) }; - std::vector back_edges {}; + auto body_program = build_program(engine, body, "", output_primitive_maps, back_edges); + topology topology( input_layout("input", input_mem->get_layout()), input_layout("trip_count", trip_count_mem->get_layout()), input_layout("initial_condition", initial_condition_mem->get_layout()), mutable_data("num_iteration", num_iteration_mem), - loop("loop", { input_info("input") }, body, + loop("loop", { input_info("num_iteration"), input_info("input") }, body_program, "trip_count", "initial_condition", "num_iteration", input_primitive_maps, output_primitive_maps, back_edges, trip_count) ); @@ -274,6 +304,8 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test) std::vector inner_output_primitive_maps { loop::io_primitive_map("inner_loop", "inner_eltwise", 2) }; std::vector inner_back_edges {}; + auto inner_body_program = build_program(engine, inner_loop_body, "", inner_output_primitive_maps, inner_back_edges); + ///////////////////////////////// // set outer loop body ///////////////////////////////// @@ -282,8 +314,8 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test) input_layout("trip_count", inner_trip_count_mem->get_layout()), input_layout("initial_condition", inner_initial_condition_mem->get_layout()), mutable_data("inner_num_iteration", inner_num_iteration_mem), - loop("inner_loop", { input_info("inner_input"), input_info("trip_count"), input_info("initial_condition") }, - inner_loop_body, "trip_count", "initial_condition", "inner_num_iteration", + loop("inner_loop", { input_info("inner_num_iteration"), input_info("inner_input"), input_info("trip_count"), input_info("initial_condition") }, + inner_body_program, "trip_count", "initial_condition", "inner_num_iteration", inner_input_primitive_maps, inner_output_primitive_maps, inner_back_edges, inner_trip_count) ); std::vector outer_input_primitive_maps { @@ -296,6 +328,8 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test) }; std::vector outer_back_edges { {"inner_loop", "inner_input"} }; + auto outer_body_program = build_program(engine, outer_loop_body, "", outer_output_primitive_maps, outer_back_edges); + ///////////////////////////////// // set main topology ///////////////////////////////// @@ -306,8 +340,8 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test) mutable_data("num_iteration", num_iteration_mem), input_layout("inner_trip_count", inner_trip_count_mem->get_layout()), input_layout("inner_initial_condition", inner_initial_condition_mem->get_layout()), - loop("loop", { input_info("input"), input_info("inner_trip_count"), input_info("inner_initial_condition") }, - outer_loop_body, "trip_count", "initial_condition", "num_iteration", + loop("loop", { input_info("num_iteration"), input_info("input"), input_info("inner_trip_count"), input_info("inner_initial_condition") }, + outer_body_program, "trip_count", "initial_condition", "num_iteration", outer_input_primitive_maps, outer_output_primitive_maps, outer_back_edges, outer_trip_count) ); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp index 7be1609c64a9c9..ac22cc773f885a 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/core/type/element_type_traits.hpp" #include "test_utils.h" #include "random_generator.hpp" @@ -1924,7 +1925,7 @@ void TiledPermuteTest::run_test(const std::vector& si const std::string & permute_opt, std::vector permute_order, bool is_caching_test) { // convert ov::float16 to ov::float16 - using type_ = typename data_type_to_type::type; + using type_ = typename ov::element_type_traits::value_type; using type = typename std::conditional::value, ov::float16, type_>::type; std::vector internal_sizes(sizes); @@ -2318,7 +2319,7 @@ struct TiledPerformancePermuteTest : TiledPermuteTest { auto& engine = get_test_engine(); // convert ov::float16 to ov::float16 - using type_ = typename data_type_to_type::type; + using type_ = typename ov::element_type_traits::value_type; using type = typename std::conditional::value, ov::float16, type_>::type; std::vector internal_sizes(sizes); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/scatter_nd_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/scatter_nd_update_gpu_test.cpp index d905755e789a71..b3f68e0f00b349 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/scatter_nd_update_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/scatter_nd_update_gpu_test.cpp @@ -4458,6 +4458,67 @@ TEST(scatter_nd_update_gpu, dynamic) { } } + +TEST(scatter_nd_update_gpu, dynamic_padded_output) { + // Dictionary : 2x1x2x8 + // Indexes : 0x3 + // Updates : 0x8 + // Output : 2x1x2x8 + // Input values in fp32 + // + auto& engine = get_test_engine(); + + auto input1_layout = layout{ ov::PartialShape::dynamic(4), data_types::f32, format::bfyx }; + auto input2_layout = layout{ ov::PartialShape::dynamic(2), data_types::f32, format::bfyx }; + auto input3_layout = layout{ ov::PartialShape::dynamic(2), data_types::f32, format::bfyx }; + + auto input1 = engine.allocate_memory({ { 1, 1, 2, 8 }, data_types::f32, format::bfyx }); // Dictionary + auto input2 = engine.allocate_memory({ { 0, 3 }, data_types::f32, format::bfyx }); // Indexes + auto input3 = engine.allocate_memory({ { 0, 8 }, data_types::f32, format::bfyx }); // Updates + + set_values(input1, { + 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, + 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, + }); + + topology topology; + topology.add(input_layout("InputData", input1_layout)); + topology.add(input_layout("InputIndices", input2_layout)); + topology.add(input_layout("InputUpdates", input3_layout)); + topology.add( + scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2, padding({0, 0, 1, 1})) + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + + network.set_input_data("InputData", input1); + network.set_input_data("InputIndices", input2); + network.set_input_data("InputUpdates", input3); + + auto inst = network.get_primitive("scatter_nd_update"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network.execute(); + + auto output = outputs.at("scatter_nd_update").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + std::vector expected_results = { + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 0.f, + 0.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + }; + + for (size_t i = 0; i < expected_results.size(); ++i) { + ASSERT_EQ(expected_results[i], output_ptr[i]); + } +} + TEST(scatter_nd_update_gpu, dynamic_5d) { tests::random_generator rg(std::string(::testing::UnitTest::GetInstance()->current_test_info()->test_suite_name()) + std::string(::testing::UnitTest::GetInstance()->current_test_info()->name())); diff --git a/src/plugins/template/CMakeLists.txt b/src/plugins/template/CMakeLists.txt index 4a3691186302aa..47cbb954d63dbd 100644 --- a/src/plugins/template/CMakeLists.txt +++ b/src/plugins/template/CMakeLists.txt @@ -7,8 +7,6 @@ cmake_minimum_required(VERSION 3.13) project(OpenVINOTemplatePlugin) -set(TEMPLATE_PLUGIN_SOURCE_DIR ${OpenVINOTemplatePlugin_SOURCE_DIR}) - find_package(OpenVINODeveloperPackage REQUIRED) ov_option(ENABLE_TEMPLATE_REGISTRATION "Enables registration of TEMPLATE plugin" OFF) diff --git a/src/plugins/template/backend/CMakeLists.txt b/src/plugins/template/backend/CMakeLists.txt index 2836d0c34b6c4f..0dc03242b554af 100644 --- a/src/plugins/template/backend/CMakeLists.txt +++ b/src/plugins/template/backend/CMakeLists.txt @@ -2,12 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 # +set(TARGET_NAME openvino_interpreter_backend) + add_definitions(-DIN_OV_COMPONENT) -ov_deprecated_no_errors() -file(GLOB OPS_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cpp" - ) +file(GLOB OPS_SRC "${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cpp") set (SRC backend.cpp @@ -24,31 +23,38 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") ov_add_compiler_flags(/wd4018) endif() -add_library(interpreter_backend STATIC EXCLUDE_FROM_ALL ${OPS_SRC} ${SRC}) -add_library(openvino::interpreter_backend ALIAS interpreter_backend) +add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${OPS_SRC} ${SRC}) + +add_library(openvino::interpreter_backend ALIAS ${TARGET_NAME}) +set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME interpreter_backend) if(CMAKE_COMPILER_IS_GNUCXX) ov_add_compiler_flags(-Wno-missing-declarations) endif() -ov_build_target_faster(interpreter_backend UNITY) +ov_build_target_faster(${TARGET_NAME} UNITY) -target_compile_definitions(interpreter_backend +target_compile_definitions(${TARGET_NAME} PRIVATE SHARED_LIB_PREFIX="${CMAKE_SHARED_LIBRARY_PREFIX}" SHARED_LIB_SUFFIX="${OV_BUILD_POSTFIX}${CMAKE_SHARED_LIBRARY_SUFFIX}" ) -target_link_libraries(interpreter_backend PRIVATE openvino::builders openvino::reference openvino::util openvino::runtime::dev openvino::shape_inference) +target_link_libraries(${TARGET_NAME} PRIVATE openvino::builders openvino::reference openvino::util openvino::runtime::dev openvino::shape_inference) -target_include_directories(interpreter_backend PUBLIC $ $) +target_include_directories(${TARGET_NAME} PUBLIC $ + $ + $) file(GLOB_RECURSE all_backends_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp") -ov_add_clang_format_target(interpreter_backend_clang FOR_SOURCES ${all_backends_src}) +ov_add_clang_format_target(${TARGET_NAME}_clang FOR_SOURCES ${all_backends_src}) -# developer package +# install & export -openvino_developer_export_targets(COMPONENT core TARGETS interpreter_backend) +ov_install_static_lib(${TARGET_NAME} ${OV_CPACK_COMP_CORE}) -# install +ov_developer_package_export_targets(TARGET openvino::interpreter_backend) -ov_install_static_lib(interpreter_backend ${OV_CPACK_COMP_CORE}) +install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" + DESTINATION developer_package/include/${TARGET_NAME} + COMPONENT developer_package EXCLUDE_FROM_ALL + FILES_MATCHING PATTERN "*.hpp") diff --git a/src/plugins/template/src/CMakeLists.txt b/src/plugins/template/src/CMakeLists.txt index f382f90d11b450..effc4831f18947 100644 --- a/src/plugins/template/src/CMakeLists.txt +++ b/src/plugins/template/src/CMakeLists.txt @@ -26,7 +26,7 @@ ov_mark_target_as_cc(${TARGET_NAME}) target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" - "${TEMPLATE_PLUGIN_SOURCE_DIR}/include") + "${OpenVINOTemplatePlugin_SOURCE_DIR}/include") # link common OpenVINO Runtime libraries target_link_libraries(${TARGET_NAME} PRIVATE diff --git a/src/plugins/template/tests/functional/CMakeLists.txt b/src/plugins/template/tests/functional/CMakeLists.txt index eb634d4f91f955..18296710d64a28 100644 --- a/src/plugins/template/tests/functional/CMakeLists.txt +++ b/src/plugins/template/tests/functional/CMakeLists.txt @@ -18,7 +18,7 @@ ov_add_test_target( openvino::funcSharedTests openvino::runtime::dev INCLUDES - "${TEMPLATE_PLUGIN_SOURCE_DIR}/include" + "${OpenVINOTemplatePlugin_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/op_reference" ADD_CLANG_FORMAT LABELS diff --git a/src/plugins/template/tests/functional/op_reference/convert.cpp b/src/plugins/template/tests/functional/op_reference/convert.cpp index 0f3e47148790be..b6195744c9c6f3 100644 --- a/src/plugins/template/tests/functional/op_reference/convert.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert.cpp @@ -103,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, - std::vector{0xFB, 0x0A}, + std::vector{0xBF, 0xA0}, std::vector{15.0f, 11.0f, 0.0f, 10.0f}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -134,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::i4, ov::element::f32, - std::vector{0xFE, 0xF2}, + std::vector{0xEF, 0x2F}, std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -245,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -261,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -269,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -277,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -285,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -301,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -309,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -317,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -325,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -333,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -341,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -349,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), // destination i8 @@ -364,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -395,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -452,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -483,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -540,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -571,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -628,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -659,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -718,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::u4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -758,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::i4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -825,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -841,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -849,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -857,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -865,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -881,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -889,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -897,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -905,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -913,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -921,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -929,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), @@ -945,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -976,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1034,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1065,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1123,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1154,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1211,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1242,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, diff --git a/src/plugins/template/tests/functional/op_reference/convert_like.cpp b/src/plugins/template/tests/functional/op_reference/convert_like.cpp index b46fe98af030c2..4ddf3dda276b92 100644 --- a/src/plugins/template/tests/functional/op_reference/convert_like.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert_like.cpp @@ -6,6 +6,8 @@ #include +#include + #include "conversion.hpp" using namespace ov; @@ -101,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, - std::vector{0xFB, 0x0A}, + std::vector{0xBF, 0xA0}, std::vector{15.0f, 11.0f, 0.0f, 10.0f}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -132,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::i4, ov::element::f32, - std::vector{0xFE, 0xF2}, + std::vector{0xEF, 0x2F}, std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -243,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -259,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -267,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -275,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -283,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -299,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -307,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -315,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -323,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -331,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -339,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -347,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), // destination i8 @@ -362,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -393,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -436,7 +438,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i8, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i16 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -450,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -481,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -524,7 +526,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i16, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i32 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -538,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -569,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -612,7 +614,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i32, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i64 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -626,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -657,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -700,7 +702,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i64, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination u1 @@ -716,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::u4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -756,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::i4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -823,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -839,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -847,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -855,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -863,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -879,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -887,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -895,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -903,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -911,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -919,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -927,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), @@ -943,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -974,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1017,7 +1019,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u8, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u16 @@ -1032,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1063,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1106,7 +1108,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u16, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u32 @@ -1121,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1152,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1195,7 +1197,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u32, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u64 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1209,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1240,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1283,7 +1285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u64, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3})), ReferenceConversionLayerTest::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt b/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt index 6f981d0702c96d..027af903cff097 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt @@ -12,13 +12,13 @@ list(APPEND LIBRARIES ) # add subgraphs_dumpers tool -ov_add_target( +ov_add_test_target( NAME ${TARGET_NAME} TYPE EXECUTABLE ROOT ${CMAKE_CURRENT_SOURCE_DIR}/src INCLUDES PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/include + "$" LINK_LIBRARIES PRIVATE ${LIBRARIES} @@ -34,7 +34,7 @@ ov_add_target( ROOT "${CMAKE_CURRENT_SOURCE_DIR}/src" INCLUDES PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include + "$" LINK_LIBRARIES PUBLIC ${LIBRARIES} @@ -46,4 +46,4 @@ ov_add_target( ADD_CPPLINT ) -ov_build_target_faster(${TARGET_NAME} UNITY) \ No newline at end of file +ov_build_target_faster(${TARGET_NAME} UNITY) diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/CMakeLists.txt b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/CMakeLists.txt index 0f25d3218e1b92..5f9dc01189a443 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/CMakeLists.txt +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/CMakeLists.txt @@ -14,8 +14,8 @@ ov_add_target( ADD_CPPLINT INCLUDES PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${OpenVINO_SOURCE_DIR}/src/tests/functional/plugin/conformance/subgraphs_dumper_new/include/cache/meta/ + "$" + "$" LINK_LIBRARIES PUBLIC gflags diff --git a/src/tests/functional/plugin/shared/CMakeLists.txt b/src/tests/functional/plugin/shared/CMakeLists.txt index c75d2938d6badf..3715abb9bb059d 100644 --- a/src/tests/functional/plugin/shared/CMakeLists.txt +++ b/src/tests/functional/plugin/shared/CMakeLists.txt @@ -52,13 +52,11 @@ ov_add_target( ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src ADD_CPPLINT - DEVELOPER_PACKAGE - tests EXCLUDED_SOURCE_PATHS ${EXCLUDED_SOURCE_PATHS} DEFINES ${DEFINES} INCLUDES PUBLIC - ${PUBLIC_HEADERS_DIR} + "$" LINK_LIBRARIES PUBLIC openvino::pugixml @@ -86,3 +84,8 @@ if (ENABLE_INTEL_CPU) "${CMAKE_CURRENT_SOURCE_DIR}/src/behavior/ov_executable_network/get_metric.cpp" PROPERTIES COMPILE_DEFINITIONS ENABLE_INTEL_CPU=1) endif() + +# install & export + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${PUBLIC_HEADERS_DIR}/") diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/group_normalization.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/group_normalization.hpp new file mode 100644 index 00000000000000..8f080764adbced --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/group_normalization.hpp @@ -0,0 +1,21 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "shared_test_classes/single_layer/group_normalization.hpp" + +namespace ov { +namespace test { +namespace subgraph { + +TEST_P(GroupNormalizationTest, CompareWithRefs) { + run(); +} + +TEST_P(GroupNormalizationTest, CompareQueryModel) { + query_model(); +} + +} // namespace subgraph +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/einsum.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/einsum.hpp new file mode 100644 index 00000000000000..375a02f2ec65f4 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/einsum.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/einsum.hpp" + +namespace ov { +namespace test { +TEST_P(EinsumLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/eye.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/eye.hpp new file mode 100644 index 00000000000000..ebacbba67b4844 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/eye.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include "shared_test_classes/single_op/eye.hpp" + +namespace ov { +namespace test { +TEST_P(EyeLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/pooling.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/pooling.hpp new file mode 100644 index 00000000000000..495c551e9764eb --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/pooling.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/pooling.hpp" + +namespace ov { +namespace test { +TEST_P(PoolingLayerTest, Inference) { + run(); +} + +TEST_P(MaxPoolingV8LayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/roi_align.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/roi_align.hpp new file mode 100644 index 00000000000000..96e1bbeeeac639 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/roi_align.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/roi_align.hpp" + +namespace ov { +namespace test { +TEST_P(ROIAlignLayerTest, Inference) { + run(); +} + +TEST_P(ROIAlignV9LayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/roi_pooling.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/roi_pooling.hpp new file mode 100644 index 00000000000000..3b89b91aa15504 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/roi_pooling.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/roi_pooling.hpp" + +namespace ov { +namespace test { +TEST_P(ROIPoolingLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/mul_conv_fusion.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/mul_conv_fusion.hpp index 46d65560b9e1ab..529b22c56e401e 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/mul_conv_fusion.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/mul_conv_fusion.hpp @@ -6,9 +6,12 @@ #include "shared_test_classes/subgraph/mul_conv_fusion.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { TEST_P(MulConvFusion, CompareWithRefs) { - Run(); + run(); } -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/multiply_add.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/multiply_add.hpp index f3d65830592133..93f3600048a90d 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/multiply_add.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/multiply_add.hpp @@ -6,10 +6,12 @@ #include "shared_test_classes/subgraph/multiply_add.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { TEST_P(MultiplyAddLayerTest, CompareWithRefs) { - Run(); + run(); }; -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/mvn_multiply_add.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/mvn_multiply_add.hpp index 8b118b1295f140..174dffd25ca873 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/mvn_multiply_add.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/mvn_multiply_add.hpp @@ -6,10 +6,12 @@ #include "shared_test_classes/subgraph/mvn_multiply_add.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { -TEST_P(MVNMultiplyAdd, CompareWithRefs){ - Run(); +TEST_P(MVNMultiplyAdd, CompareWithRefs) { + run(); }; -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/perm_conv_perm_concat.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/perm_conv_perm_concat.hpp index b4ad568c56b462..41582d13ffd009 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/perm_conv_perm_concat.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/perm_conv_perm_concat.hpp @@ -6,10 +6,12 @@ #include "shared_test_classes/subgraph/perm_conv_perm_concat.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { TEST_P(PermConvPermConcat, CompareWithRefs) { - Run(); + run(); } -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/range_add.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/range_add.hpp index 197fe81621e5f8..ce16e5850744a5 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/range_add.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/range_add.hpp @@ -6,14 +6,16 @@ #include "shared_test_classes/subgraph/range_add.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { TEST_P(RangeAddSubgraphTest, CompareWithRefs) { - Run(); + run(); } TEST_P(RangeNumpyAddSubgraphTest, CompareWithRefs) { - Run(); + run(); } -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/relu_shape_of.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/relu_shape_of.hpp index b2391ef6c04dd8..e26e8837f80279 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/relu_shape_of.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/relu_shape_of.hpp @@ -6,10 +6,12 @@ #include "shared_test_classes/subgraph/relu_shape_of.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { TEST_P(ReluShapeOfSubgraphTest, CompareWithRefs) { - Run(); + run(); } -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/reshape_squeeze_reshape_relu.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/reshape_squeeze_reshape_relu.hpp index 331ee30f7e515d..7b1c841d7d17b7 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/reshape_squeeze_reshape_relu.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/reshape_squeeze_reshape_relu.hpp @@ -6,10 +6,12 @@ #include "shared_test_classes/subgraph/reshape_squeeze_reshape_relu.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { -TEST_P(ReshapeSqueezeReshapeRelu, CompareWithRefs){ - Run(); +TEST_P(ReshapeSqueezeReshapeRelu, CompareWithRefs) { + run(); }; -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp index c121683977f898..5fd61dd48f738a 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp @@ -6,14 +6,17 @@ #include "shared_test_classes/subgraph/split_conv_concat.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { TEST_P(SplitConvConcat, CompareWithRefImpl) { - Run(); + run(); }; -TEST_P(SplitConvConcat, QueryNetwork) { - QueryNetwork(); +TEST_P(SplitConvConcat, QueryModel) { + query_model(); } -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov + diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/variadic_split_pad.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/variadic_split_pad.hpp index 9b9d4fb77839ea..8f1eec6cbf6214 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/variadic_split_pad.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/variadic_split_pad.hpp @@ -6,10 +6,12 @@ #include "shared_test_classes/subgraph/variadic_split_pad.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { -TEST_P(VariadicSplitPad, CompareWithRefs){ - Run(); +TEST_P(VariadicSplitPad, CompareWithRefs) { + run(); }; -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/infer_request_dynamic.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/infer_request_dynamic.cpp index 6540664e3f4b11..e431c4d6eead2b 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/infer_request_dynamic.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/infer_request_dynamic.cpp @@ -76,6 +76,14 @@ bool OVInferRequestDynamicTests::checkOutput(const ov::runtime::Tensor& in, cons tensor.data()[i] = in.data()[i]; } req.infer(); + const auto reqShape = req.get_output_tensor(0).get_shape(); + const auto actualShape = actual.get_shape(); + if (reqShape.size() != actualShape.size()) { + return false; + } + if (!std::equal(reqShape.cbegin(), reqShape.cend(), actualShape.cbegin())) { + return false; + } for (int i = 0; i < actual.get_size(); i++) { if (fabs(req.get_output_tensor(0).data()[i] - actual.data()[i]) > std::numeric_limits::epsilon()) return false; diff --git a/src/tests/functional/shared_test_classes/CMakeLists.txt b/src/tests/functional/shared_test_classes/CMakeLists.txt index 35d09840c09770..a4f46b241437b0 100644 --- a/src/tests/functional/shared_test_classes/CMakeLists.txt +++ b/src/tests/functional/shared_test_classes/CMakeLists.txt @@ -9,11 +9,9 @@ ov_add_target( TYPE STATIC ROOT "${CMAKE_CURRENT_SOURCE_DIR}/include" ADD_CPPLINT - DEVELOPER_PACKAGE - tests INCLUDES PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/include" + "$" ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES @@ -25,3 +23,8 @@ ov_add_target( ov_build_target_faster(${TARGET_NAME} PCH PRIVATE "src/precomp.hpp" ) + +# install & export + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp index 05867b81d67c8c..5ca0b6531a39f3 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp @@ -20,6 +20,12 @@ using ElementType = ov::element::Type_t; using Config = ov::AnyMap; using TargetDevice = std::string; +typedef std::tuple + BasicParams; + class SubgraphBaseTest : public ov::test::TestsCommon { public: virtual void run(); diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/group_normalization.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/group_normalization.hpp new file mode 100644 index 00000000000000..759f47786d98be --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/group_normalization.hpp @@ -0,0 +1,98 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once +#include "ov_models/builders.hpp" +#include "common_test_utils/common_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +namespace subgraph { + +using GroupNormalizationTestParams = std::tuple; + +class GroupNormalizationTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + ElementType netType, inType, outType; + InputShape shapes; + std::int64_t num_groups; + double epsilon; + TargetDevice targetDevice; + Config config; + std::tie(netType, inType, outType, shapes, num_groups, epsilon, targetDevice, config) = obj.param; + + std::ostringstream result; + result << "NetType=" << netType << "_"; + result << "InType=" << inType << "_"; + result << "OutType=" << outType << "_"; + result << "IS=" << ov::test::utils::partialShape2str({shapes.first}) << "_"; + result << "TS="; + for (const auto& item : shapes.second) { + result << ov::test::utils::vec2str(item) << "_"; + } + result << "NumGroups=" << num_groups << "_"; + result << "Epsilon=" << epsilon << "_"; + result << "Device=" << targetDevice; + + return result.str(); + } + +protected: + void SetUp() override { + InputShape shapes; + ElementType ngPrc; + std::int64_t num_groups; + double epsilon; + + std::tie(ngPrc, inType, outType, shapes, num_groups, epsilon, targetDevice, configuration) = this->GetParam(); + InputShape biasInputShape = ExtractBiasShape(shapes); + init_input_shapes({shapes, biasInputShape, biasInputShape}); + ov::ParameterVector params; + for (auto&& shape : inputDynamicShapes) { + params.push_back(std::make_shared(ngPrc, shape)); + } + const auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + + const auto groupNormalization = std::make_shared( + paramOuts.at(0), + paramOuts.at(1), + paramOuts.at(2), + num_groups, + epsilon); + const ngraph::ResultVector results{std::make_shared(groupNormalization)}; + + // TODO: This workaround is needed as there is no full support for f16 type in the reference implementation + if (ngPrc == element::Type_t::f16) { + abs_threshold = 0.007; + } + + function = std::make_shared(results, params, "GroupNormalization"); + } + + InputShape ExtractBiasShape(const InputShape& shape) { + std::vector biasShape; + std::transform(shape.second.cbegin(), shape.second.cend(), std::back_inserter(biasShape), + [](const ov::Shape& s)->ov::Shape { return {s[1]}; }); + InputShape biasInputShape { + shape.first.is_dynamic() ? ov::PartialShape{shape.first[1]} : shape.first, + std::move(biasShape) + }; + return biasInputShape; + } +}; + +} // namespace subgraph +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/einsum.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/einsum.hpp new file mode 100644 index 00000000000000..aeef17447e6a43 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/einsum.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +typedef std::tuple< + std::string, // Equation + std::vector // Input shapes +> EinsumEquationWithInput; + +typedef std::tuple< + ov::element::Type, // Model type + EinsumEquationWithInput, // Equation with corresponding input shapes + std::string // Device name +> EinsumLayerTestParamsSet; + +class EinsumLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/eye.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/eye.hpp new file mode 100644 index 00000000000000..1563a8a37f9cda --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/eye.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using EyeLayerTestParams = std::tuple< + std::vector, // eye shape + std::vector, // output batch shape + std::vector, // eye params (rows, cols, diag_shift) + ov::element::Type, // Model type + std::string>; // Device name + +class EyeLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/pooling.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/pooling.hpp new file mode 100644 index 00000000000000..cfb57994b35b33 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/pooling.hpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { +typedef std::tuple< + ov::test::utils::PoolingTypes, // Pooling type, max or avg + std::vector, // Kernel size + std::vector, // Stride + std::vector, // Pad begin + std::vector, // Pad end + ov::op::RoundingType, // Rounding type + ov::op::PadType, // Pad type + bool // Exclude pad +> poolSpecificParams; + +typedef std::tuple< + poolSpecificParams, + ov::element::Type, // Model type + std::vector, // Input shape + std::string // Device name +> poolLayerTestParamsSet; + +typedef std::tuple< + std::vector, // Kernel size + std::vector, // Stride + std::vector, // Dilation + std::vector, // Pad begin + std::vector, // Pad end + ov::element::Type, // Index element type + int64_t, // Axis + ov::op::RoundingType, // Rounding type + ov::op::PadType // Pad type +> maxPoolV8SpecificParams; + +typedef std::tuple< + maxPoolV8SpecificParams, + ov::element::Type, // Model type + std::vector, // Input shape + std::string // Device name +> maxPoolV8LayerTestParamsSet; + +class PoolingLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +class MaxPoolingV8LayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/roi_align.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/roi_align.hpp new file mode 100644 index 00000000000000..57257c7154a829 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/roi_align.hpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using roialignParams = std::tuple< + std::vector, // Feature map shape + ov::Shape, // Proposal coords shape + int, // Bin's row count + int, // Bin's column count + float, // Spatial scale + int, // Pooling ratio + std::string, // Pooling mode + ov::element::Type, // Model type + ov::test::TargetDevice>; // Device name + +class ROIAlignLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + static void fillCoordTensor(std::vector& coords, int height, int width, + float spatialScale, int pooledRatio, int pooledH, int pooledW); + static void fillIdxTensor(std::vector& idx, int batchSize); + +protected: + void SetUp() override; +}; + +using roialignV9Params = std::tuple< + std::vector, // Feature map shape + ov::Shape, // Proposal coords shape + int, // Bin's row count + int, // Bin's column count + float, // Spatial scale + int, // Pooling ratio + std::string, // Pooling mode + std::string, // ROI aligned mode + ov::element::Type, // Model type + ov::test::TargetDevice>; // Device name +class ROIAlignV9LayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/roi_pooling.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/roi_pooling.hpp new file mode 100644 index 00000000000000..ecd714f6ef511a --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/roi_pooling.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { +using roiPoolingParamsTuple = std::tuple< + std::vector, // Input, coords shapes + ov::Shape, // Pooled shape {pooled_h, pooled_w} + float, // Spatial scale + ov::test::utils::ROIPoolingTypes, // ROIPooling method + ov::element::Type, // Model type + ov::test::TargetDevice>; // Device name + +class ROIPoolingLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mul_conv_fusion.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mul_conv_fusion.hpp index 7486a950fffd6b..571a8903c32f75 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mul_conv_fusion.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mul_conv_fusion.hpp @@ -4,32 +4,33 @@ #pragma once -#include #include +#include #include -#include "shared_test_classes/base/layer_test_utils.hpp" -#include -#include - -namespace SubgraphTestsDefinitions { - -typedef std::tuple< - ngraph::NodeTypeInfo, // Convolution type - ngraph::Shape, // Input shape - ngraph::Shape, // Weights shape - ngraph::Shape, // Const shape - ngraph::element::Type, // Network precision - bool, // True if test is negative - std::string // Device name - > MulConvFusionParams; - -class MulConvFusion - : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +typedef std::tuple + MulConvFusionParams; + +class MulConvFusion : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - static std::string getTestCaseName(const testing::TestParamInfo &obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); protected: void SetUp() override; }; -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiply_add.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiply_add.hpp index 1e016857d4728f..fd93d5a01e3560 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiply_add.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiply_add.hpp @@ -4,30 +4,28 @@ #pragma once -#include +#include #include +#include #include -#include -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "ov_models/builders.hpp" -#include "ov_models/utils/ov_helpers.hpp" -#include "common_test_utils/test_constants.hpp" -namespace SubgraphTestsDefinitions { +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { -using MultiplyAddParamsTuple = typename std::tuple< - std::vector, //input shapes - InferenceEngine::Precision, //Network precision - std::string>; //Device name +using MultiplyAddParamsTuple = typename std::tuple; // Device name -class MultiplyAddLayerTest: - public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon{ +class MultiplyAddLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - std::shared_ptr fn; - static std::string getTestCaseName(const testing::TestParamInfo &obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); + protected: void SetUp() override; }; -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mvn_multiply_add.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mvn_multiply_add.hpp index 800fc2cbb0caa1..f8218c2f04238f 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mvn_multiply_add.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/mvn_multiply_add.hpp @@ -4,31 +4,34 @@ #pragma once -#include #include +#include #include -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "ov_models/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { -typedef std::tuple< - std::pair, // Input shape, Constant shape - InferenceEngine::Precision, // Data precision - InferenceEngine::Precision, // Axes precision - std::vector, // Axes - bool, // Normalize variance - float, // Epsilon - std::string, // Epsilon mode - std::string // Device name -> mvnMultiplyAddParams; +typedef std::tuple, // Input shape, Constant shape + ov::element::Type, // Data precision + ov::element::Type, // Axes precision + std::vector, // Axes + bool, // Normalize variance + float, // Epsilon + std::string, // Epsilon mode + std::string // Device name + > + mvnMultiplyAddParams; -class MVNMultiplyAdd: public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon{ +class MVNMultiplyAdd : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - static std::string getTestCaseName(const testing::TestParamInfo &obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); + protected: void SetUp() override; }; -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/perm_conv_perm_concat.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/perm_conv_perm_concat.hpp index b8be31f93271e7..e9b47db5482dee 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/perm_conv_perm_concat.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/perm_conv_perm_concat.hpp @@ -5,32 +5,29 @@ #pragma once #include -#include -#include -#include -#include - -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "ov_models/utils/ov_helpers.hpp" -#include "ov_models/builders.hpp" - -namespace SubgraphTestsDefinitions { -typedef std::tuple< - InferenceEngine::Precision, // Network Precision - std::string, // Target Device - std::array, // Input shape - std::array, // Kernel shape - size_t, // Output channels - std::map // Configuration -> PermConvPermConcatParams; + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +typedef std::tuple + PermConvPermConcatParams; class PermConvPermConcat : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { + virtual public ov::test::SubgraphBaseStaticTest { public: static std::string getTestCaseName(const testing::TestParamInfo& obj); protected: void SetUp() override; - void Run() override; }; -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/range_add.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/range_add.hpp index 18fe4775ebbea7..6139db3244fc83 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/range_add.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/range_add.hpp @@ -4,36 +4,45 @@ #pragma once -#include +#include #include +#include #include -#include -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "ov_models/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" -#include "shared_test_classes/single_layer/range.hpp" +namespace ov { +namespace test { -namespace SubgraphTestsDefinitions { +typedef std::tuple + RangeParams; // ------------------------------ V0 ------------------------------ -class RangeAddSubgraphTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { +class RangeAddSubgraphTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - static std::string getTestCaseName(const testing::TestParamInfo& obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); + protected: void SetUp() override; }; // ------------------------------ V4 ------------------------------ -class RangeNumpyAddSubgraphTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { +class RangeNumpyAddSubgraphTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - static std::string getTestCaseName(const testing::TestParamInfo& obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); + protected: void SetUp() override; }; -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_shape_of.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_shape_of.hpp index c84f0e2d8292e5..6127de67d74ab1 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_shape_of.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_shape_of.hpp @@ -4,23 +4,26 @@ #pragma once -#include -#include -#include -#include +#include "shared_test_classes/base/ov_subgraph.hpp" -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "shared_test_classes/single_layer/shape_of.hpp" +namespace ov { +namespace test { -#include "ov_models/builders.hpp" +typedef std::tuple + ShapeOfParams; -namespace SubgraphTestsDefinitions { - -class ReluShapeOfSubgraphTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { +class ReluShapeOfSubgraphTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - static std::string getTestCaseName(const testing::TestParamInfo& obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); + protected: void SetUp() override; }; -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reshape_squeeze_reshape_relu.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reshape_squeeze_reshape_relu.hpp index eb4811e08cf353..b2c69f96d946c0 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reshape_squeeze_reshape_relu.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reshape_squeeze_reshape_relu.hpp @@ -4,30 +4,32 @@ #pragma once -#include #include +#include #include -#include -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "ov_models/builders.hpp" -#include "ov_models/utils/ov_helpers.hpp" - -namespace SubgraphTestsDefinitions { -using ShapeAxesTuple = std::pair, std::vector>; - -using ReshapeSqueezeReshapeReluTuple = typename std::tuple< - ShapeAxesTuple, // Input shapes & squeeze_indices - InferenceEngine::Precision, // Network precision - std::string, // Device name - ngraph::helpers::SqueezeOpType // SqueezeOpType ->; - -class ReshapeSqueezeReshapeRelu - : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { + +#include "common_test_utils/test_enums.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +using ShapeAxesTuple = std::pair>; + +using ReshapeSqueezeReshapeReluTuple = typename std::tuple; + +class ReshapeSqueezeReshapeRelu : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - static std::string getTestCaseName(const testing::TestParamInfo &obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); + protected: void SetUp() override; }; -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/split_conv_concat.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/split_conv_concat.hpp index a02822a50b9374..d74865a6bb0c6b 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/split_conv_concat.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/split_conv_concat.hpp @@ -4,19 +4,37 @@ #pragma once +#include +#include #include #include -#include -#include #include "shared_test_classes/base/layer_test_utils.hpp" -#include "ov_models/utils/ov_helpers.hpp" -#include "ov_models/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +class SplitConvConcatBase : public ov::test::SubgraphBaseStaticTest { +protected: + void configure_test(const ov::test::BasicParams& param); +}; + +class SplitConvConcat : public testing::WithParamInterface, virtual public SplitConvConcatBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +} // namespace test +} // namespace ov namespace SubgraphTestsDefinitions { class SplitConvConcat : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { + virtual public ov::test::SplitConvConcatBase { public: static std::string getTestCaseName(const testing::TestParamInfo& obj); diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/variadic_split_pad.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/variadic_split_pad.hpp index 6cfbf94286902d..d9744f8b2ab8cc 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/variadic_split_pad.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/variadic_split_pad.hpp @@ -4,33 +4,35 @@ #pragma once -#include #include +#include #include -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "ov_models/builders.hpp" - -namespace SubgraphTestsDefinitions { +#include "shared_test_classes/base/ov_subgraph.hpp" -typedef std::tuple< - InferenceEngine::SizeVector, // Input shapes - size_t, // Axis - std::vector, // Split number - std::vector, // Index connected layer - std::vector, // Pad begin - std::vector, // Pad end - ngraph::helpers::PadMode, // Pad mode - InferenceEngine::Precision, // Network precision - std::string // Device name -> SplitPadTuple; +namespace ov { +namespace test { +typedef std::tuple, // Split number + std::vector, // Index connected layer + std::vector, // Pad begin + std::vector, // Pad end + ov::op::PadMode, // Pad mode + ov::element::Type, // Input element type + std::string // Device name + > + SplitPadTuple; -class VariadicSplitPad: public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon{ +class VariadicSplitPad : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { public: - static std::string getTestCaseName(const testing::TestParamInfo &obj); + static std::string getTestCaseName(const testing::TestParamInfo& obj); + protected: void SetUp() override; }; -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp index 139678a602fd21..0d8909c46581e0 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp @@ -9,7 +9,7 @@ #include "common_test_utils/ov_tensor_utils.hpp" -#include "shared_test_classes/single_layer/roi_align.hpp" +#include "shared_test_classes/single_op/roi_align.hpp" #include "shared_test_classes/single_layer/psroi_pooling.hpp" #include "shared_test_classes/base/utils/generate_inputs.hpp" #include "shared_test_classes/base/utils/ranges.hpp" @@ -537,13 +537,13 @@ ov::runtime::Tensor generate(const std::shared_ptr& node, if (node->get_sampling_ratio() != 0) { const auto &inputShape = node->get_input_shape(0); std::vector blobData(node->get_shape()[0] * 4); - LayerTestsDefinitions::ROIAlignLayerTest::fillCoordTensor(blobData, - inputShape[2], - inputShape[3], - node->get_spatial_scale(), - node->get_sampling_ratio(), - node->get_pooled_h(), - node->get_pooled_w()); + ov::test::ROIAlignLayerTest::fillCoordTensor(blobData, + inputShape[2], + inputShape[3], + node->get_spatial_scale(), + node->get_sampling_ratio(), + node->get_pooled_h(), + node->get_pooled_w()); return ov::test::utils::create_tensor(ov::element::f32, targetShape, blobData); } else { return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); @@ -551,7 +551,7 @@ ov::runtime::Tensor generate(const std::shared_ptr& node, } case 2: { std::vector roiIdxVector(node->get_shape()[0]); - LayerTestsDefinitions::ROIAlignLayerTest::fillIdxTensor(roiIdxVector, node->get_shape()[0]); + ov::test::ROIAlignLayerTest::fillIdxTensor(roiIdxVector, node->get_shape()[0]); return ov::test::utils::create_tensor(elemType, targetShape, roiIdxVector); } default: diff --git a/src/tests/functional/shared_test_classes/src/single_op/einsum.cpp b/src/tests/functional/shared_test_classes/src/single_op/einsum.cpp new file mode 100644 index 00000000000000..4912bf6a0afd04 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/einsum.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/einsum.hpp" + +namespace ov { +namespace test { + +std::string EinsumLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + EinsumEquationWithInput equation_with_input; + ov::element::Type model_type; + std::string targetDevice; + std::tie(model_type, equation_with_input, targetDevice) = obj.param; + std::string equation; + std::vector shapes; + std::tie(equation, shapes) = equation_with_input; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < shapes.size(); i++) { + result << ov::test::utils::partialShape2str({shapes[i].first}) << (i < shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < shapes.size(); j++) { + result << ov::test::utils::vec2str(shapes[j].second[i]) << (j < shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "PRC=" << model_type.get_type_name() << "_"; + result << "Eq=" << equation << "_"; + result << "trgDev=" << targetDevice; + return result.str(); +} + +void EinsumLayerTest::SetUp() { + EinsumEquationWithInput equation_with_input; + ov::element::Type model_type; + std::tie(model_type, equation_with_input, targetDevice) = this->GetParam(); + std::string equation; + std::vector shapes; + std::tie(equation, shapes) = equation_with_input; + init_input_shapes(shapes); + + ov::ParameterVector params; + ov::OutputVector param_outs; + for (const auto& shape : inputDynamicShapes) { + auto param = std::make_shared(model_type, shape); + params.push_back(param); + param_outs.push_back(param); + } + + auto einsum = std::make_shared(param_outs, equation); + + auto result = std::make_shared(einsum); + function = std::make_shared(result, params, "einsum"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/eye.cpp b/src/tests/functional/shared_test_classes/src/single_op/eye.cpp new file mode 100644 index 00000000000000..0aae5e1593ef43 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/eye.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "shared_test_classes/single_op/eye.hpp" + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "openvino/pass/constant_folding.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/eye.hpp" + +namespace ov { +namespace test { +std::string EyeLayerTest::getTestCaseName(testing::TestParamInfo obj) { + std::string td; + std::vector input_shapes; + ov::element::Type model_type; + std::vector out_batch_shape; + std::vector eye_par; + std::tie(input_shapes, out_batch_shape, eye_par, model_type, td) = obj.param; + std::ostringstream result; + result << "EyeTest_"; + result << "IS=("; + for (const auto& shape : input_shapes) { + result << ov::test::utils::partialShape2str({shape}) << "_"; + } + result << ")"; + result << "rowNum=" << eye_par[0] << "_"; + result << "colNum=" << eye_par[1] << "_"; + result << "diagShift=" << eye_par[2] << "_"; + result << "batchShape=" << ov::test::utils::vec2str(out_batch_shape) << "_"; + result << model_type << "_"; + result << std::to_string(obj.index); + return result.str(); +} + +void EyeLayerTest::SetUp() { + std::vector input_shapes; + int row_num, col_num; + int shift; + std::vector out_batch_shape; + ov::element::Type model_type; + std::vector eye_par; + std::tie(input_shapes, out_batch_shape, eye_par, model_type, targetDevice) = this->GetParam(); + row_num = eye_par[0]; + col_num = eye_par[1]; + shift = eye_par[2]; + + std::shared_ptr eye_operation; + + auto rows_const = std::make_shared(ov::element::i32, input_shapes[0], &row_num); + rows_const->set_friendly_name("rows"); + auto cols_const = std::make_shared(ov::element::i32, input_shapes[1], &col_num); + cols_const->set_friendly_name("cols"); + auto diag_const = std::make_shared(ov::element::i32, input_shapes[2], &shift); + diag_const->set_friendly_name("diagInd"); + + if (!out_batch_shape.empty() && out_batch_shape[0] != 0) { + auto batch_shape_par = std::make_shared(ov::element::i32, + ov::Shape{out_batch_shape.size()}, + out_batch_shape.data()); + batch_shape_par->set_friendly_name("batchShape"); + eye_operation = std::make_shared(rows_const, cols_const, diag_const, batch_shape_par, model_type); + } else { + eye_operation = std::make_shared(rows_const, cols_const, diag_const, model_type); + } + + // Without this call the eye operation will be calculated by CPU and substituted by Constant operator + ov::pass::disable_constant_folding(eye_operation); + auto result = std::make_shared(eye_operation); + function = std::make_shared(result, ov::ParameterVector(), "eye"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/pooling.cpp b/src/tests/functional/shared_test_classes/src/single_op/pooling.cpp new file mode 100644 index 00000000000000..c86f8a2217fee5 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/pooling.cpp @@ -0,0 +1,170 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/pooling.hpp" + +#include "ov_models/builders.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/avg_pool.hpp" +#include "openvino/op/max_pool.hpp" + +namespace ov { +namespace test { +using ov::test::utils::PoolingTypes; + +std::string PoolingLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + poolSpecificParams pool_params; + ov::element::Type model_type; + std::vector shapes; + std::string targetDevice; + std::tie(pool_params, model_type, shapes, targetDevice) = obj.param; + PoolingTypes pool_type; + std::vector kernel, stride; + std::vector pad_begin, pad_end; + ov::op::PadType pad_type; + ov::op::RoundingType rounding_type; + bool excludePad; + std::tie(pool_type, kernel, stride, pad_begin, pad_end, rounding_type, pad_type, excludePad) = pool_params; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < shapes.size(); i++) { + result << ov::test::utils::partialShape2str({shapes[i].first}) << (i < shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < shapes.size(); j++) { + result << ov::test::utils::vec2str(shapes[j].second[i]) << (j < shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + switch (pool_type) { + case PoolingTypes::MAX: + result << "MaxPool_"; + break; + case PoolingTypes::AVG: + result << "AvgPool_"; + result << "ExcludePad=" << excludePad << "_"; + break; + } + result << "K" << ov::test::utils::vec2str(kernel) << "_"; + result << "S" << ov::test::utils::vec2str(stride) << "_"; + result << "PB" << ov::test::utils::vec2str(pad_begin) << "_"; + result << "PE" << ov::test::utils::vec2str(pad_end) << "_"; + result << "Rounding=" << rounding_type << "_"; + result << "AutoPad=" << pad_type << "_"; + result << "modelType=" << model_type.get_type_name() << "_"; + result << "trgDev=" << targetDevice; + return result.str(); +} + +void PoolingLayerTest::SetUp() { + poolSpecificParams pool_params; + std::vector shapes; + ov::element::Type model_type; + std::tie(pool_params, model_type, shapes, targetDevice) = this->GetParam(); + PoolingTypes pool_type; + std::vector kernel, stride; + std::vector pad_begin, pad_end; + ov::op::PadType pad_type; + ov::op::RoundingType rounding_type; + bool excludePad; + std::tie(pool_type, kernel, stride, pad_begin, pad_end, rounding_type, pad_type, excludePad) = pool_params; + init_input_shapes(shapes); + + auto param = std::make_shared(model_type, inputDynamicShapes.front()); + + std::shared_ptr pooling = ngraph::builder::makePooling(param, + stride, + pad_begin, + pad_end, + kernel, + rounding_type, + pad_type, + excludePad, + pool_type); + + auto result = std::make_shared(pooling); + function = std::make_shared(result, ov::ParameterVector{param}, "pooling"); +} + + +std::string MaxPoolingV8LayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + maxPoolV8SpecificParams pool_params; + ov::element::Type model_type; + std::vector shapes; + std::string target_device; + std::tie(pool_params, model_type, shapes, target_device) = obj.param; + std::vector kernel, stride, dilation; + std::vector pad_begin, pad_end; + ov::op::PadType pad_type; + ov::op::RoundingType rounding_type; + ov::element::Type index_element_type; + int64_t axis; + std::tie(kernel, stride, dilation, pad_begin, pad_end, index_element_type, axis, rounding_type, pad_type) = pool_params; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < shapes.size(); i++) { + result << ov::test::utils::partialShape2str({shapes[i].first}) << (i < shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < shapes.size(); j++) { + result << ov::test::utils::vec2str(shapes[j].second[i]) << (j < shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "K" << ov::test::utils::vec2str(kernel) << "_"; + result << "S" << ov::test::utils::vec2str(stride) << "_"; + result << "D" << ov::test::utils::vec2str(dilation) << "_"; + result << "PB" << ov::test::utils::vec2str(pad_begin) << "_"; + result << "PE" << ov::test::utils::vec2str(pad_end) << "_"; + result << "IET" << index_element_type << "_"; + result << "A" << axis << "_"; + result << "Rounding=" << rounding_type << "_"; + result << "AutoPad=" << pad_type << "_"; + result << "modelType=" << model_type.get_type_name() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void MaxPoolingV8LayerTest::SetUp() { + maxPoolV8SpecificParams pool_params; + ov::element::Type model_type; + std::vector shapes; + std::tie(pool_params, model_type, shapes, targetDevice) = this->GetParam(); + std::vector kernel, stride, dilation; + std::vector pad_begin, pad_end; + ov::op::PadType pad_type; + ov::op::RoundingType rounding_type; + ov::element::Type index_element_type; + int64_t axis; + std::tie(kernel, stride, dilation, pad_begin, pad_end, index_element_type, axis, rounding_type, pad_type) = pool_params; + init_input_shapes(shapes); + + auto param = std::make_shared(model_type, inputDynamicShapes.front()); + + std::shared_ptr max_pool = ngraph::builder::makeMaxPoolingV8(param, stride, dilation, pad_begin, pad_end, + kernel, rounding_type, pad_type, + index_element_type, axis); + + const auto max_pool_v8_second_output_is_supported = targetDevice == ov::test::utils::DEVICE_GPU; + + ov::ResultVector results; + if (max_pool_v8_second_output_is_supported) { + results = {std::make_shared(max_pool->output(0)), + std::make_shared(max_pool->output(1))}; + } else { + results = { std::make_shared(max_pool->output(0)) }; + } + function = std::make_shared(max_pool->outputs(), ov::ParameterVector{param}, "MaxPoolV8"); +} + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/roi_align.cpp b/src/tests/functional/shared_test_classes/src/single_op/roi_align.cpp new file mode 100644 index 00000000000000..d191e7dda9565f --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/roi_align.cpp @@ -0,0 +1,203 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "shared_test_classes/single_op/roi_align.hpp" + +#include "openvino/core/enum_names.hpp" + +namespace ov { +namespace test { +std::string ROIAlignLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shapes; + ov::Shape coords_shape; + int pooled_h; + int pooled_w; + float spatial_scale; + int pooling_ratio; + std::string pooling_mode; + ov::element::Type model_type; + std::string target_device; + std::tie(input_shapes, coords_shape, pooled_h, pooled_w, spatial_scale, + pooling_ratio, pooling_mode, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < input_shapes.size(); i++) { + result << ov::test::utils::partialShape2str({input_shapes[i].first}) + << (i < input_shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < input_shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < input_shapes.size(); j++) { + result << ov::test::utils::vec2str(input_shapes[j].second[i]) << (j < input_shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "coordShape=" << ov::test::utils::vec2str(coords_shape) << "_"; + result << "pooledH=" << pooled_h << "_"; + result << "pooledW=" << pooled_w << "_"; + result << "spatialScale=" << spatial_scale << "_"; + result << "poolingRatio=" << pooling_ratio << "_"; + result << "poolingMode=" << pooling_mode << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +static int randInt(int low, int high) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(low, high); + return dis(gen); +} + +void ROIAlignLayerTest::fillCoordTensor(std::vector& coords, int height, int width, + float spatial_scale, int pooled_ratio, int pooled_h, int pooled_w) { + int min_roi_width = pooled_w; + int max_roi_width = width / pooled_ratio; + int min_roi_height = pooled_h; + int max_roi_height = height / pooled_ratio; + + for (int i = 0; i < coords.size() / 4; i++) { + int size_x = std::min(width, randInt(min_roi_width, max_roi_width)); + int size_y = std::min(height, randInt(min_roi_height, max_roi_height)); + int start_x = randInt(0, std::max(1, width - size_x - 1)); + int start_y = randInt(0, std::max(1, height - size_y - 1)); + + coords[i * 4] = start_x / spatial_scale; + coords[i * 4 + 1] = start_y / spatial_scale; + coords[i * 4 + 2] = (start_x + size_x - 1) / spatial_scale; + coords[i * 4 + 3] = (start_y + size_y - 1) / spatial_scale; + } +} +void ROIAlignLayerTest::fillIdxTensor(std::vector& idx, int batch_size) { + int batch_id = 0; + for (int i = 0; i < idx.size(); i++) { + idx[i] = batch_id; + batch_id = (batch_id + 1) % batch_size; + } +} + +void ROIAlignLayerTest::SetUp() { + std::vector input_shapes; + ov::Shape coords_shape; + int pooled_h; + int pooled_w; + float spatial_scale; + int pooling_ratio; + std::string pooling_mode; + ov::element::Type model_type; + std::tie(input_shapes, coords_shape, pooled_h, pooled_w, spatial_scale, + pooling_ratio, pooling_mode, model_type, targetDevice) = this->GetParam(); + + init_input_shapes(input_shapes); + + auto param = std::make_shared(model_type, inputDynamicShapes[0]); + std::vector proposal_vector; + std::vector roi_idx_vector; + proposal_vector.resize(coords_shape[0] * 4); + roi_idx_vector.resize(coords_shape[0]); + + fillCoordTensor(proposal_vector, inputDynamicShapes[0][2].get_length(), inputDynamicShapes[0][3].get_length(), + spatial_scale, pooling_ratio, pooled_h, pooled_w); + fillIdxTensor(roi_idx_vector, inputDynamicShapes[0][0].get_length()); + auto idx_shape = ov::Shape{coords_shape[0]}; + + auto coords = std::make_shared(model_type, coords_shape, proposal_vector.data()); + auto rois_Idx = std::make_shared(ov::element::i32, idx_shape, roi_idx_vector.data()); + auto roi_align = std::make_shared(param, + coords, + rois_Idx, + pooled_h, + pooled_w, + pooling_ratio, + spatial_scale, + pooling_mode); + function = std::make_shared(roi_align->outputs(), ov::ParameterVector{param}, "roi_align"); +} + +std::string ROIAlignV9LayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shapes; + ov::Shape coords_shape; + int pooled_h; + int pooled_w; + float spatial_scale; + int pooling_ratio; + std::string pooling_mode; + std::string roi_aligned_mode; + ov::element::Type model_type; + std::string target_device; + std::tie(input_shapes, coords_shape, pooled_h, pooled_w, spatial_scale, + pooling_ratio, pooling_mode, roi_aligned_mode, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < input_shapes.size(); i++) { + result << ov::test::utils::partialShape2str({input_shapes[i].first}) + << (i < input_shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < input_shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < input_shapes.size(); j++) { + result << ov::test::utils::vec2str(input_shapes[j].second[i]) << (j < input_shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "coordShape=" << ov::test::utils::vec2str(coords_shape) << "_"; + result << "pooledH=" << pooled_h << "_"; + result << "pooledW=" << pooled_w << "_"; + result << "spatialScale=" << spatial_scale << "_"; + result << "poolingRatio=" << pooling_ratio << "_"; + result << "poolingMode=" << pooling_mode << "_"; + result << "ROIMode=" << roi_aligned_mode << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ROIAlignV9LayerTest::SetUp() { + std::vector input_shapes; + ov::Shape coords_shape; + int pooled_h; + int pooled_w; + float spatial_scale; + int pooling_ratio; + std::string pooling_mode; + std::string roi_aligned_mode; + ov::element::Type model_type; + std::tie(input_shapes, coords_shape, pooled_h, pooled_w, spatial_scale, + pooling_ratio, pooling_mode, roi_aligned_mode, model_type, targetDevice) = this->GetParam(); + + init_input_shapes(input_shapes); + + auto param = std::make_shared(model_type, inputDynamicShapes[0]); + std::vector proposal_vector; + std::vector roi_idx_vector; + proposal_vector.resize(coords_shape[0] * 4); + roi_idx_vector.resize(coords_shape[0]); + + ROIAlignLayerTest::fillCoordTensor(proposal_vector, inputDynamicShapes[0][2].get_length(), inputDynamicShapes[0][3].get_length(), + spatial_scale, pooling_ratio, pooled_h, pooled_w); + ROIAlignLayerTest::fillIdxTensor(roi_idx_vector, inputDynamicShapes[0][0].get_length()); + auto idx_shape = ov::Shape{coords_shape[0]}; + + auto coords = std::make_shared(model_type, coords_shape, proposal_vector.data()); + auto rois_Idx = std::make_shared(ov::element::i32, idx_shape, roi_idx_vector.data()); + auto roi_align = std::make_shared(param, + coords, + rois_Idx, + pooled_h, + pooled_w, + pooling_ratio, + spatial_scale, + ov::EnumNames::as_enum(pooling_mode), + ov::EnumNames::as_enum(roi_aligned_mode)); + function = std::make_shared(roi_align->outputs(), ov::ParameterVector{param}, "roi_align"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/roi_pooling.cpp b/src/tests/functional/shared_test_classes/src/single_op/roi_pooling.cpp new file mode 100644 index 00000000000000..f4867abe6629b7 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/roi_pooling.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/roi_pooling.hpp" + +namespace ov { +namespace test { +std::string ROIPoolingLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shapes; + ov::Shape pool_shape; + float spatial_scale; + ov::test::utils::ROIPoolingTypes pool_method; + ov::element::Type model_type; + std::string target_device; + std::tie(input_shapes, pool_shape, spatial_scale, pool_method, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < input_shapes.size(); i++) { + result << ov::test::utils::partialShape2str({input_shapes[i].first}) + << (i < input_shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < input_shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < input_shapes.size(); j++) { + result << ov::test::utils::vec2str(input_shapes[j].second[i]) << (j < input_shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "PS=" << ov::test::utils::vec2str(pool_shape) << "_"; + result << "Scale=" << spatial_scale << "_"; + switch (pool_method) { + case utils::ROIPoolingTypes::ROI_MAX: + result << "Max_"; + break; + case utils::ROIPoolingTypes::ROI_BILINEAR: + result << "Bilinear_"; + break; + } + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ROIPoolingLayerTest::SetUp() { + std::vector input_shapes; + ov::Shape pool_shape; + float spatial_scale; + ov::test::utils::ROIPoolingTypes pool_method; + ov::element::Type model_type; + std::string target_device; + std::tie(input_shapes, pool_shape, spatial_scale, pool_method, model_type, targetDevice) = this->GetParam(); + + abs_threshold = 0.08f; + + init_input_shapes(input_shapes); + + auto param = std::make_shared(model_type, inputDynamicShapes[0]); + auto coord_param = std::make_shared(model_type, inputDynamicShapes[1]); + std::string pool_method_str; + if (pool_method == ov::test::utils::ROIPoolingTypes::ROI_MAX) { + pool_method_str = "max"; + } else if (pool_method == ov::test::utils::ROIPoolingTypes::ROI_BILINEAR) { + pool_method_str = "bilinear"; + } else { + FAIL() << "Incorrect type of ROIPooling operation"; + } + auto roi_pooling = std::make_shared(param, coord_param, pool_shape, spatial_scale, pool_method_str); + function = std::make_shared(roi_pooling->outputs(), ov::ParameterVector{param, coord_param}, "roi_pooling"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp index d16090a2e1c819..411cff4a46ab21 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp @@ -2,17 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/common_optimizations/mul_conv_fusion.hpp" -#include "ngraph/pass/constant_folding.hpp" #include "shared_test_classes/subgraph/mul_conv_fusion.hpp" + +#include "common_test_utils/graph_comparator.hpp" +#include "openvino/pass/manager.hpp" #include "ov_models/builders.hpp" +#include "transformations/common_optimizations/mul_conv_fusion.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { -std::string MulConvFusion::getTestCaseName(const testing::TestParamInfo &obj) { - ngraph::NodeTypeInfo conv_type; - ngraph::Shape input_shape, weights_shape, const_shape; - ngraph::element::Type precision; +std::string MulConvFusion::getTestCaseName(const testing::TestParamInfo& obj) { + ov::NodeTypeInfo conv_type; + ov::Shape input_shape, weights_shape, const_shape; + ov::element::Type precision; std::string device; std::tie(conv_type, input_shape, weights_shape, const_shape, precision, std::ignore, device) = obj.param; std::ostringstream results; @@ -27,36 +30,43 @@ std::string MulConvFusion::getTestCaseName(const testing::TestParamInfoGetParam(); - auto param = std::make_shared(precision, input_shape); + std::tie(conv_type, input_shape, weights_shape, const_shape, precision, is_negative, targetDevice) = + this->GetParam(); + auto param = std::make_shared(precision, input_shape); auto spatial_dims = input_shape.size() - 2; auto mul_const = ngraph::builder::makeConstant(precision, const_shape, {}, true); - auto mul = std::make_shared(param, mul_const); - ngraph::Shape strides(spatial_dims, 1); + auto mul = std::make_shared(param, mul_const); + ov::Shape strides(spatial_dims, 1); std::vector pad_begin(spatial_dims, 0), pad_end(spatial_dims, 0); auto weights = ngraph::builder::makeConstant(precision, weights_shape, {}, true); - std::shared_ptr conv; - if (conv_type == ngraph::opset8::Convolution::get_type_info_static()) { - conv = std::make_shared(mul, weights, strides, pad_begin, pad_end, strides); - } else if (conv_type == ngraph::opset8::GroupConvolution::get_type_info_static()) { - conv = std::make_shared(mul, weights, strides, pad_begin, pad_end, strides); - } else if (conv_type == ngraph::opset8::ConvolutionBackpropData::get_type_info_static()) { - conv = std::make_shared(mul, weights, strides, pad_begin, pad_end, strides); - } else if (conv_type == ngraph::opset8::GroupConvolutionBackpropData::get_type_info_static()) { - conv = std::make_shared(mul, weights, strides, pad_begin, pad_end, strides); + std::shared_ptr conv; + if (conv_type == ov::op::v1::Convolution::get_type_info_static()) { + conv = std::make_shared(mul, weights, strides, pad_begin, pad_end, strides); + } else if (conv_type == ov::op::v1::GroupConvolution::get_type_info_static()) { + conv = std::make_shared(mul, weights, strides, pad_begin, pad_end, strides); + } else if (conv_type == ov::op::v1::ConvolutionBackpropData::get_type_info_static()) { + conv = + std::make_shared(mul, weights, strides, pad_begin, pad_end, strides); + } else if (conv_type == ov::op::v1::GroupConvolutionBackpropData::get_type_info_static()) { + conv = std::make_shared(mul, + weights, + strides, + pad_begin, + pad_end, + strides); } else { OPENVINO_THROW("Unsupported type"); } - function = std::make_shared(ngraph::OutputVector{conv}, ngraph::ParameterVector{param}); - auto cloned_function = ngraph::clone_function(*function); + function = std::make_shared(ov::OutputVector{conv}, ov::ParameterVector{param}); + auto cloned_function = function->clone(); - ngraph::pass::Manager manager; + ov::pass::Manager manager; manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -65,58 +75,75 @@ void MulConvFusion::SetUp() { bool functions_equal = false; if (!is_negative) { - auto param = std::make_shared(precision, input_shape); - ngraph::Shape strides(spatial_dims, 1); + auto param = std::make_shared(precision, input_shape); + ov::Shape strides(spatial_dims, 1); std::vector pad_begin(spatial_dims, 0), pad_end(spatial_dims, 0); - std::shared_ptr conv; - if (conv_type == ngraph::opset8::Convolution::get_type_info_static()) { - weights = std::make_shared(weights, mul_const); + std::shared_ptr conv; + if (conv_type == ov::op::v1::Convolution::get_type_info_static()) { + weights = std::make_shared(weights, mul_const); weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); - conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); - } else if (conv_type == ngraph::opset8::GroupConvolution::get_type_info_static()) { + conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); + } else if (conv_type == ov::op::v1::GroupConvolution::get_type_info_static()) { const_shape.insert(const_shape.begin(), weights_shape.size() - const_shape.size(), 1); auto G = const_shape[2] > 1 ? weights_shape[0] : 1; const_shape[0] = G; const_shape[2] /= G; - auto reshape = std::make_shared(mul_const, - ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false); - weights = std::make_shared(weights, reshape); + auto reshape = std::make_shared( + mul_const, + ov::op::v0::Constant::create(ov::element::u64, ov::Shape{const_shape.size()}, const_shape), + false); + weights = std::make_shared(weights, reshape); weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); - conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); - } else if (conv_type == ngraph::opset8::ConvolutionBackpropData::get_type_info_static()) { + conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); + } else if (conv_type == ov::op::v1::ConvolutionBackpropData::get_type_info_static()) { const_shape.insert(const_shape.begin(), weights_shape.size() - const_shape.size(), 1); const_shape[0] = const_shape[1]; const_shape[1] = 1; - auto reshape = std::make_shared(mul_const, - ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false); - weights = std::make_shared(weights, reshape); + auto reshape = std::make_shared( + mul_const, + ov::op::v0::Constant::create(ov::element::u64, ov::Shape{const_shape.size()}, const_shape), + false); + weights = std::make_shared(weights, reshape); weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); - conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); - } else if (conv_type == ngraph::opset8::GroupConvolutionBackpropData::get_type_info_static()) { + conv = std::make_shared(param, + weights, + strides, + pad_begin, + pad_end, + strides); + } else if (conv_type == ov::op::v1::GroupConvolutionBackpropData::get_type_info_static()) { const_shape.insert(const_shape.begin(), weights_shape.size() - const_shape.size(), 1); auto G = const_shape[2] > 1 ? weights_shape[0] : 1; const_shape[0] = G; const_shape[1] = const_shape[2] / G; const_shape[2] = 1; - auto reshape = std::make_shared(mul_const, - ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false); - weights = std::make_shared(weights, reshape); + auto reshape = std::make_shared( + mul_const, + ov::op::v0::Constant::create(ov::element::u64, ov::Shape{const_shape.size()}, const_shape), + false); + weights = std::make_shared(weights, reshape); weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); - conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); + conv = std::make_shared(param, + weights, + strides, + pad_begin, + pad_end, + strides); } else { OPENVINO_THROW("Unsupported type"); } - auto reference_function = std::make_shared(ngraph::OutputVector{conv}, ngraph::ParameterVector{param}); + auto reference_function = std::make_shared(ov::OutputVector{conv}, ov::ParameterVector{param}); std::tie(functions_equal, std::ignore) = compare_functions(cloned_function, reference_function, true); ASSERT_TRUE(functions_equal); } else { - auto reference_function = ngraph::clone_function(*function); + auto reference_function = function->clone(); std::tie(functions_equal, std::ignore) = compare_functions(cloned_function, reference_function, true); ASSERT_TRUE(functions_equal); } } -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp b/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp index 8b070a70026d15..dfc1dcdb5f7fd5 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp @@ -4,37 +4,43 @@ #include "shared_test_classes/subgraph/multiply_add.hpp" -namespace SubgraphTestsDefinitions { -std::string MultiplyAddLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { - std::vector inputShapes; - InferenceEngine::Precision netPrecision; +#include "ov_models/builders.hpp" +#include "ov_models/utils/ov_helpers.hpp" + +namespace ov { +namespace test { + +std::string MultiplyAddLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::Shape inputShapes; + ov::element::Type element_type; std::string targetName; - std::tie(inputShapes, netPrecision, targetName) = obj.param; + std::tie(inputShapes, element_type, targetName) = obj.param; std::ostringstream results; results << "IS=" << ov::test::utils::vec2str(inputShapes) << "_"; - results << "netPRC=" << netPrecision.name() << "_"; + results << "ET=" << element_type << "_"; results << "targetDevice=" << targetName << "_"; return results.str(); } void MultiplyAddLayerTest::SetUp() { - std::vector inputShape; - auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(inputShape, netPrecision, targetDevice) = this->GetParam(); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); + ov::Shape inputShape; + ov::element::Type element_type; + std::tie(inputShape, element_type, targetDevice) = this->GetParam(); + ov::ParameterVector params{std::make_shared(element_type, ov::PartialShape(inputShape))}; + auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector constShape(inputShape.size(), 1); constShape[1] = inputShape[1]; - auto const_mul = ngraph::builder::makeConstant(ngPrc, constShape, {}, true); - auto mul = std::make_shared(paramOuts[0], const_mul); - auto const_add = ngraph::builder::makeConstant(ngPrc, constShape, {}, true); - auto add = std::make_shared(mul, const_add); - ngraph::ResultVector results{std::make_shared(add)}; - function = std::make_shared(results, params, "multiplyAdd"); + auto const_mul = ngraph::builder::makeConstant(element_type, constShape, {}, true); + auto mul = std::make_shared(paramOuts[0], const_mul); + auto const_add = ngraph::builder::makeConstant(element_type, constShape, {}, true); + auto add = std::make_shared(mul, const_add); + ov::ResultVector results{std::make_shared(add)}; + function = std::make_shared(results, params, "multiplyAdd"); } -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp b/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp index 2371182175c711..9ff6272b9ab529 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp @@ -4,12 +4,16 @@ #include "shared_test_classes/subgraph/mvn_multiply_add.hpp" -namespace SubgraphTestsDefinitions { +#include "ov_models/builders.hpp" +#include "ov_models/utils/ov_helpers.hpp" -std::string MVNMultiplyAdd::getTestCaseName(const testing::TestParamInfo &obj) { - std::pair shapes; - InferenceEngine::SizeVector inputShapes, constantShapes; - InferenceEngine::Precision dataPrecision, axesPrecision; +namespace ov { +namespace test { + +std::string MVNMultiplyAdd::getTestCaseName(const testing::TestParamInfo& obj) { + std::pair shapes; + ov::Shape inputShapes, constantShapes; + ov::element::Type dataPrecision, axesPrecision; std::vector axes; bool normalizeVariance; float eps; @@ -20,8 +24,8 @@ std::string MVNMultiplyAdd::getTestCaseName(const testing::TestParamInfo shapes; - InferenceEngine::SizeVector inputShapes, constantShapes; - InferenceEngine::Precision dataPrecision, axesPrecision; + std::pair shapes; + ov::Shape inputShapes, constantShapes; + ov::element::Type dataType, axesType; std::vector axes; bool normalizeVariance; float eps; std::string epsMode; - std::tie(shapes, dataPrecision, axesPrecision, axes, normalizeVariance, eps, epsMode, targetDevice) = this->GetParam(); + std::tie(shapes, dataType, axesType, axes, normalizeVariance, eps, epsMode, targetDevice) = this->GetParam(); std::tie(inputShapes, constantShapes) = shapes; - auto dataType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrecision); - auto axesType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(axesPrecision); - - ov::ParameterVector param {std::make_shared(dataType, ov::Shape(inputShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); - auto axesNode = ngraph::builder::makeConstant(axesType, ngraph::Shape{axes.size()}, axes); + ov::ParameterVector param{std::make_shared(dataType, ov::Shape(inputShapes))}; + auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); + auto axesNode = ngraph::builder::makeConstant(axesType, ov::Shape{axes.size()}, axes); auto mvn = ngraph::builder::makeMVN6(paramOuts[0], axesNode, normalizeVariance, eps, epsMode); auto gamma = ngraph::builder::makeConstant(dataType, constantShapes, {}, true); - auto mul = std::make_shared(mvn, gamma); + auto mul = std::make_shared(mvn, gamma); auto beta = ngraph::builder::makeConstant(dataType, constantShapes, {}, true); - auto add = std::make_shared(mul, beta); + auto add = std::make_shared(mul, beta); - ngraph::ResultVector results{std::make_shared(add)}; - function = std::make_shared(results, param, "MVNMultiplyAdd"); + ov::ResultVector results{std::make_shared(add)}; + function = std::make_shared(results, param, "MVNMultiplyAdd"); } -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp b/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp index b488166297f618..e51e30a6caaa49 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/perm_conv_perm_concat.cpp @@ -4,105 +4,97 @@ #include "shared_test_classes/subgraph/perm_conv_perm_concat.hpp" -namespace SubgraphTestsDefinitions { +#include "common_test_utils/data_utils.hpp" +#include "functional_test_utils/skip_tests_config.hpp" +#include "ov_models/builders.hpp" + +namespace ov { +namespace test { + std::string PermConvPermConcat::getTestCaseName(const testing::TestParamInfo& obj) { - InferenceEngine::Precision netPrecision; + ov::element::Type element_type; std::string targetName; - std::array input_shape; - std::array kernel_shape; + ov::Shape input_shape; + ov::Shape kernel_shape; size_t output_channels; - std::map configuration; - + ov::AnyMap configuration; - std::tie(netPrecision, targetName, input_shape, kernel_shape, output_channels, configuration) = obj.param; + std::tie(element_type, targetName, input_shape, kernel_shape, output_channels, configuration) = obj.param; std::ostringstream results; results << "IS=" << ov::test::utils::vec2str(std::vector(input_shape.begin(), input_shape.end())) << "_"; results << "KS=" << ov::test::utils::vec2str(std::vector(kernel_shape.begin(), kernel_shape.end())) << "_"; results << "OC=" << output_channels << "_"; - results << "netPRC=" << netPrecision.name() << "_"; + results << "ET=" << element_type << "_"; results << "targetDevice=" << targetName; for (auto const& configItem : configuration) { - results << "_configItem=" << configItem.first << "_" << configItem.second; + results << "_configItem=" << configItem.first << "_" << configItem.second.as(); } return results.str(); } void PermConvPermConcat::SetUp() { - InferenceEngine::Precision netPrecision; - std::array input_shape; - std::array kernel_shape; + ov::element::Type element_type; + ov::Shape input_shape; + ov::Shape kernel_shape; size_t output_channels; - std::map additional_config; + ov::AnyMap additional_config; - std::tie(netPrecision, targetDevice, input_shape, kernel_shape, output_channels, additional_config) = this->GetParam(); + std::tie(element_type, targetDevice, input_shape, kernel_shape, output_channels, additional_config) = + this->GetParam(); configuration.insert(additional_config.begin(), additional_config.end()); const std::size_t input_dim = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - std::vector input_dims { 1, input_dim }; + std::vector input_dims{1, input_dim}; std::vector reshape_in_dims = std::vector(input_shape.begin(), input_shape.end()); - std::vector permute_in_order = { 0, 3, 1, 2 }; - std::vector permute_out_order = { 0, 2, 3, 1 }; + std::vector permute_in_order = {0, 3, 1, 2}; + std::vector permute_out_order = {0, 2, 3, 1}; - ov::ParameterVector input_parameter {std::make_shared(ngPrc, ov::Shape(input_dims))}; + ov::ParameterVector input_parameter{std::make_shared(element_type, ov::Shape(input_dims))}; - auto reshape_in_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, - reshape_in_dims); - auto reshape_in = std::make_shared(input_parameter[0], reshape_in_pattern, false); + auto reshape_in_pattern = std::make_shared(ov::element::i64, ov::Shape{4}, reshape_in_dims); + auto reshape_in = std::make_shared(input_parameter[0], reshape_in_pattern, false); - auto permute_in_params = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, - ngraph::Shape{permute_in_order}); - auto permute_in = std::make_shared(reshape_in, permute_in_params); + auto permute_in_params = + std::make_shared(ov::element::i64, ov::Shape{4}, ov::Shape{permute_in_order}); + auto permute_in = std::make_shared(reshape_in, permute_in_params); auto conv_in_shape = permute_in->get_output_shape(0); auto conv_weights_size = output_channels * (conv_in_shape[1]) * kernel_shape[0] * kernel_shape[1]; - auto conv = ngraph::builder::makeConvolution(permute_in, ngPrc, {kernel_shape[0], kernel_shape[1]}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, - ngraph::op::PadType::VALID, output_channels, false, ov::test::utils::generate_float_numbers(conv_weights_size, -0.5f, 0.5f)); - - auto permute_out_params = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, - permute_out_order); - auto permute_out = std::make_shared(conv, permute_out_params); + auto conv = + ngraph::builder::makeConvolution(permute_in, + element_type, + {kernel_shape[0], kernel_shape[1]}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::VALID, + output_channels, + false, + ov::test::utils::generate_float_numbers(conv_weights_size, -0.5f, 0.5f)); + + auto permute_out_params = std::make_shared(ov::element::i64, ov::Shape{4}, permute_out_order); + auto permute_out = std::make_shared(conv, permute_out_params); auto permute_out_shape = permute_out->get_output_shape(0); - auto concat_const = ngraph::builder::makeConstant(ngPrc, {1, 1, 1, permute_out_shape[3]}, - ov::test::utils::generate_float_numbers(permute_out_shape[3], -10, 10)); + auto concat_const = + ngraph::builder::makeConstant(element_type, + {1, 1, 1, permute_out_shape[3]}, + ov::test::utils::generate_float_numbers(permute_out_shape[3], -10, 10)); auto concat = ngraph::builder::makeConcat({permute_out, concat_const}, 2); - auto reshape_out_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{2}, + auto reshape_out_pattern = std::make_shared( + ov::element::i64, + ov::Shape{2}, InferenceEngine::SizeVector({1, (permute_out_shape[2] + 1) * permute_out_shape[3]})); - auto reshape_out = std::make_shared(concat, reshape_out_pattern, false); + auto reshape_out = std::make_shared(concat, reshape_out_pattern, false); - function = std::make_shared(reshape_out, input_parameter, "perm_conv_perm_concat"); + function = std::make_shared(reshape_out, input_parameter, "perm_conv_perm_concat"); } -void PermConvPermConcat::Run() { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - - LoadNetwork(); - - inferRequest = executableNetwork.CreateInferRequest(); - inputs.clear(); - - for (const auto &input : cnnNetwork.getInputsInfo()) { - const auto &info = input.second; - auto tensorDesc = info->getTensorDesc(); - - auto blob = FuncTestUtils::createAndFillBlobFloat(tensorDesc, 2, -1, 100, 111); - - FuncTestUtils::fillInputsBySinValues(blob); - inferRequest.SetBlob(info->name(), blob); - inputs.push_back(blob); - } - inferRequest.Infer(); - - Validate(); -} -} // namespace SubgraphTestsDefinitions +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/range_add.cpp b/src/tests/functional/shared_test_classes/src/subgraph/range_add.cpp index 86cfab9864b6cf..129b19667baca8 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/range_add.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/range_add.cpp @@ -4,84 +4,79 @@ #include "shared_test_classes/subgraph/range_add.hpp" -namespace SubgraphTestsDefinitions { +#include "ov_models/builders.hpp" + +namespace ov { +namespace test { // ------------------------------ V0 ------------------------------ -std::string RangeAddSubgraphTest::getTestCaseName(const testing::TestParamInfo& obj) { - InferenceEngine::Precision netPrecision; - InferenceEngine::Precision inPrc, outPrc; - InferenceEngine::Layout inLayout, outLayout; +std::string RangeAddSubgraphTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::element::Type input_type; float start, stop, step; std::string targetDevice; - std::tie(start, stop, step, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = obj.param; + std::tie(start, stop, step, input_type, targetDevice) = obj.param; std::ostringstream result; const char separator = '_'; result << "Start=" << start << separator; result << "Stop=" << stop << separator; result << "Step=" << step << separator; - result << "netPRC=" << netPrecision.name() << separator; + result << "ET=" << input_type << separator; result << "targetDevice=" << targetDevice; return result.str(); } void RangeAddSubgraphTest::SetUp() { - InferenceEngine::Precision netPrecision; + ov::element::Type element_type; float start, stop, step; - std::tie(start, stop, step, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = GetParam(); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - - auto startConstant = std::make_shared(ngPrc, ngraph::Shape{}, start); - auto stopConstant = std::make_shared(ngPrc, ngraph::Shape{}, stop); - auto stepConstant = std::make_shared(ngPrc, ngraph::Shape{}, step); - auto range = std::make_shared(startConstant, stopConstant, stepConstant); - - ov::ParameterVector params{std::make_shared(ngPrc, range->get_shape())}; - auto eltwise = ngraph::builder::makeEltwise(params.front(), range, ngraph::helpers::EltwiseTypes::ADD); - const ngraph::ResultVector results{std::make_shared(eltwise)}; - function = std::make_shared(results, params, "RangeEltwise"); + std::tie(start, stop, step, element_type, targetDevice) = GetParam(); + + auto startConstant = std::make_shared(element_type, ov::Shape{}, start); + auto stopConstant = std::make_shared(element_type, ov::Shape{}, stop); + auto stepConstant = std::make_shared(element_type, ov::Shape{}, step); + auto range = std::make_shared(startConstant, stopConstant, stepConstant); + + ov::ParameterVector params{std::make_shared(element_type, range->get_shape())}; + auto eltwise = ngraph::builder::makeEltwise(params.front(), range, ov::test::utils::EltwiseTypes::ADD); + const ov::ResultVector results{std::make_shared(eltwise)}; + function = std::make_shared(results, params, "RangeEltwise"); } // ------------------------------ V4 ------------------------------ -std::string RangeNumpyAddSubgraphTest::getTestCaseName(const testing::TestParamInfo& obj) { - InferenceEngine::Precision netPrc; - InferenceEngine::Precision constPrc; - InferenceEngine::Precision outPrc; - InferenceEngine::Layout inLayout, outLayout; +std::string RangeNumpyAddSubgraphTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::element::Type element_type; float start, stop, step; std::string targetDevice; - std::tie(start, stop, step, constPrc, netPrc, outPrc, inLayout, outLayout, targetDevice) = obj.param; + std::tie(start, stop, step, element_type, targetDevice) = obj.param; std::ostringstream result; const char separator = '_'; result << "Start=" << start << separator; result << "Stop=" << stop << separator; result << "Step=" << step << separator; - result << "constPRC=" << constPrc.name() << separator; - result << "netPRC=" << netPrc.name() << separator; + result << "ET=" << element_type << separator; result << "targetDevice=" << targetDevice; return result.str(); } void RangeNumpyAddSubgraphTest::SetUp() { - InferenceEngine::Precision netPrc; - InferenceEngine::Precision constPrc; + ov::element::Type element_type; float start, stop, step; - std::tie(start, stop, step, constPrc, netPrc, outPrc, inLayout, outLayout, targetDevice) = GetParam(); - auto ngConstPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(constPrc); - auto ngNetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrc); + std::tie(start, stop, step, element_type, targetDevice) = GetParam(); - auto startConstant = std::make_shared(ngConstPrc, ngraph::Shape{}, start); - auto stopConstant = std::make_shared(ngConstPrc, ngraph::Shape{}, stop); - auto stepConstant = std::make_shared(ngConstPrc, ngraph::Shape{}, step); - auto range = std::make_shared(startConstant, stopConstant, stepConstant, ngNetPrc); + auto startConstant = std::make_shared(element_type, ov::Shape{}, start); + auto stopConstant = std::make_shared(element_type, ov::Shape{}, stop); + auto stepConstant = std::make_shared(element_type, ov::Shape{}, step); + auto range = std::make_shared(startConstant, stopConstant, stepConstant, element_type); - ov::ParameterVector params{std::make_shared(ngNetPrc, range->get_shape())}; + ov::ParameterVector params{std::make_shared(element_type, range->get_shape())}; - auto eltwise = ngraph::builder::makeEltwise(params.front(), range, ngraph::helpers::EltwiseTypes::ADD); - const ngraph::ResultVector results{std::make_shared(eltwise)}; - function = std::make_shared(results, params, "RangeEltwise"); + auto eltwise = ngraph::builder::makeEltwise(params.front(), range, ov::test::utils::EltwiseTypes::ADD); + const ov::ResultVector results{std::make_shared(eltwise)}; + function = std::make_shared(results, params, "RangeEltwise"); } -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/relu_shape_of.cpp b/src/tests/functional/shared_test_classes/src/subgraph/relu_shape_of.cpp index b23f32f4d5b99f..bbb78339d04e6f 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/relu_shape_of.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/relu_shape_of.cpp @@ -4,29 +4,32 @@ #include "shared_test_classes/subgraph/relu_shape_of.hpp" -namespace SubgraphTestsDefinitions { +namespace ov { +namespace test { - std::string ReluShapeOfSubgraphTest::getTestCaseName(const testing::TestParamInfo& obj) { - InferenceEngine::SizeVector inputShapes; - InferenceEngine::Precision inputPrecision; - std::string targetDevice; - std::tie(inputPrecision, inputShapes, targetDevice) = obj.param; - std::ostringstream result; - result << "IS=" << ov::test::utils::vec2str(inputShapes) << "_"; - result << "Precision=" << inputPrecision.name() << "_"; - result << "TargetDevice=" << targetDevice; - return result.str(); - } +std::string ReluShapeOfSubgraphTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::Shape inputShapes; + ov::element::Type element_type, output_type; + std::string targetDevice; + std::tie(element_type, output_type, inputShapes, targetDevice) = obj.param; + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(inputShapes) << "_"; + result << "IET=" << element_type << "_"; + result << "OET=" << output_type << "_"; + result << "TargetDevice=" << targetDevice; + return result.str(); +} - void ReluShapeOfSubgraphTest::SetUp() { - InferenceEngine::SizeVector inputShapes; - InferenceEngine::Precision inputPrecision; - std::tie(inputPrecision, inputShapes, targetDevice) = this->GetParam(); - auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); - ov::ParameterVector param {std::make_shared(inType, ov::Shape(inputShapes))}; - auto relu = std::make_shared(param[0]); - auto shapeOf = std::make_shared(relu, inType); - const ngraph::ResultVector results{std::make_shared(shapeOf)}; - function = std::make_shared(results, param, "ReluShapeOf"); - } -} // namespace SubgraphTestsDefinitions +void ReluShapeOfSubgraphTest::SetUp() { + ov::Shape inputShapes; + ov::element::Type element_type, output_type; + std::tie(element_type, output_type, inputShapes, targetDevice) = this->GetParam(); + ov::ParameterVector param{std::make_shared(element_type, ov::Shape(inputShapes))}; + auto relu = std::make_shared(param[0]); + auto shapeOf = std::make_shared(relu, output_type); + const ov::ResultVector results{std::make_shared(shapeOf)}; + function = std::make_shared(results, param, "ReluShapeOf"); +} + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/reshape_squeeze_reshape_relu.cpp b/src/tests/functional/shared_test_classes/src/subgraph/reshape_squeeze_reshape_relu.cpp index 098f1d12a5ed89..1d0c680a55408e 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/reshape_squeeze_reshape_relu.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/reshape_squeeze_reshape_relu.cpp @@ -2,46 +2,50 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "shared_test_classes/subgraph/reshape_squeeze_reshape_relu.hpp" -namespace SubgraphTestsDefinitions { - std::string ReshapeSqueezeReshapeRelu::getTestCaseName(const testing::TestParamInfo &obj) { - ShapeAxesTuple squeezeShape; - InferenceEngine::Precision netPrecision; - std::string targetName; - ngraph::helpers::SqueezeOpType opType; - std::tie(squeezeShape, netPrecision, targetName, opType) = obj.param; - std::ostringstream results; - results << "OpType=" << opType; - results << "IS=" << ov::test::utils::vec2str(squeezeShape.first) << "_"; - results << "indices=" << ov::test::utils::vec2str(squeezeShape.second) << "_"; - results << "netPRC=" << netPrecision.name() << "_"; - results << "targetDevice=" << targetName << "_"; - return results.str(); - } +#include "ov_models/builders.hpp" - void ReshapeSqueezeReshapeRelu::SetUp() { - ShapeAxesTuple squeezeShape; - InferenceEngine::Precision netPrecision; - ngraph::helpers::SqueezeOpType opType; - std::tie(squeezeShape, netPrecision, targetDevice, opType) = this->GetParam(); - const std::size_t input_dim = InferenceEngine::details::product(squeezeShape.first); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - std::vector shape_input{1, input_dim}; - ov::ParameterVector input {std::make_shared(ngPrc, ov::Shape(shape_input))}; - auto reshape1_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{squeezeShape.first.size()}, - squeezeShape.first); - auto reshape1 = std::make_shared(input[0], reshape1_pattern, false); - auto squeeze = ngraph::builder::makeSqueezeUnsqueeze(reshape1, ngraph::element::i64, squeezeShape.second, opType); - auto reshape2_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{2}, - std::vector{1, input_dim}); - auto reshape2 = std::make_shared(squeeze, reshape2_pattern, false); - auto func = std::make_shared(reshape2); - std::string squeezeType; +namespace ov { +namespace test { - function = std::make_shared(func, input, "reshape_squeeze_reshape_relu"); - } -} // namespace SubgraphTestsDefinitions +std::string ReshapeSqueezeReshapeRelu::getTestCaseName( + const testing::TestParamInfo& obj) { + ShapeAxesTuple squeezeShape; + ov::element::Type element_type; + std::string targetName; + ov::test::utils::SqueezeOpType opType; + std::tie(squeezeShape, element_type, targetName, opType) = obj.param; + std::ostringstream results; + results << "OpType=" << opType; + results << "IS=" << ov::test::utils::vec2str(squeezeShape.first) << "_"; + results << "indices=" << ov::test::utils::vec2str(squeezeShape.second) << "_"; + results << "netPRC=" << element_type << "_"; + results << "targetDevice=" << targetName << "_"; + return results.str(); +} + +void ReshapeSqueezeReshapeRelu::SetUp() { + ShapeAxesTuple squeezeShape; + ov::element::Type element_type; + ov::test::utils::SqueezeOpType opType; + std::tie(squeezeShape, element_type, targetDevice, opType) = this->GetParam(); + const size_t input_dim = ov::shape_size(squeezeShape.first); + std::vector shape_input{1, input_dim}; + ov::ParameterVector input{std::make_shared(element_type, ov::Shape(shape_input))}; + auto reshape1_pattern = std::make_shared(ov::element::i64, + ov::Shape{squeezeShape.first.size()}, + squeezeShape.first); + auto reshape1 = std::make_shared(input[0], reshape1_pattern, false); + auto squeeze = ngraph::builder::makeSqueezeUnsqueeze(reshape1, ov::element::i64, squeezeShape.second, opType); + auto reshape2_pattern = + std::make_shared(ov::element::i64, ov::Shape{2}, std::vector{1, input_dim}); + auto reshape2 = std::make_shared(squeeze, reshape2_pattern, false); + auto func = std::make_shared(reshape2); + std::string squeezeType; + + function = std::make_shared(func, input, "reshape_squeeze_reshape_relu"); +} + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/split_conv_concat.cpp b/src/tests/functional/shared_test_classes/src/subgraph/split_conv_concat.cpp index 60ad615b7567a1..7dc009d0022fff 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/split_conv_concat.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/split_conv_concat.cpp @@ -4,30 +4,40 @@ #include "shared_test_classes/subgraph/split_conv_concat.hpp" -namespace SubgraphTestsDefinitions { +#include "common_test_utils/data_utils.hpp" +#include "ie_common.h" +#include "ov_models/builders.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" -std::string SplitConvConcat::getTestCaseName(const testing::TestParamInfo& obj) { - InferenceEngine::Precision netPrecision; - InferenceEngine::SizeVector inputShapes, newInputShapes; +namespace ov { +namespace test { + +std::string SplitConvConcat::getTestCaseName(const testing::TestParamInfo& obj) { + ov::element::Type element_type; + ov::Shape inputShapes; std::string targetDevice; - std::tie(netPrecision, inputShapes, targetDevice) = obj.param; + std::tie(element_type, inputShapes, targetDevice) = obj.param; std::ostringstream result; result << "IS=" << ov::test::utils::vec2str(inputShapes) << "_"; - result << "netPRC=" << netPrecision.name() << "_"; + result << "ET=" << element_type << "_"; result << "targetDevice=" << targetDevice; return result.str(); } void SplitConvConcat::SetUp() { - std::vector inputShape; - InferenceEngine::Precision netPrecision; - std::tie(netPrecision, inputShape, targetDevice) = this->GetParam(); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + configure_test(this->GetParam()); +} + +void SplitConvConcatBase::configure_test(const ov::test::BasicParams& param) { + ov::Shape inputShape; + ov::element::Type element_type; + std::tie(element_type, inputShape, targetDevice) = param; - ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; + ov::ParameterVector params{std::make_shared(element_type, ov::Shape(inputShape))}; - auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1); + auto split = ngraph::builder::makeSplit(params[0], element_type, 2, 1); std::vector filterWeights1; std::vector filterWeights2; @@ -35,17 +45,65 @@ void SplitConvConcat::SetUp() { filterWeights1 = ov::test::utils::generate_float_numbers(8 * inputShape[1] / 2 * 3, -0.2f, 0.2f); filterWeights2 = ov::test::utils::generate_float_numbers(8 * inputShape[1] / 2 * 3, -0.2f, 0.2f); } - auto conv1 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {1, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, - ngraph::op::PadType::VALID, 8, false, filterWeights1); - auto relu1 = std::make_shared(conv1); + auto conv1 = ngraph::builder::makeConvolution(split->output(0), + element_type, + {1, 3}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::VALID, + 8, + false, + filterWeights1); + auto relu1 = std::make_shared(conv1); + + auto conv2 = ngraph::builder::makeConvolution(split->output(1), + element_type, + {1, 3}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::VALID, + 8, + false, + filterWeights2); + auto relu2 = std::make_shared(conv2); + auto concat = std::make_shared(ov::OutputVector{relu1->output(0), relu2->output(0)}, 1); + + ov::ResultVector results{std::make_shared(concat)}; + function = std::make_shared(results, params, "SplitConvConcat"); +} + +} // namespace test +} // namespace ov + +namespace SubgraphTestsDefinitions { - auto conv2 = ngraph::builder::makeConvolution(split->output(1), ngPrc, {1, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, - ngraph::op::PadType::VALID, 8, false, filterWeights2); - auto relu2 = std::make_shared(conv2); - auto concat = std::make_shared(ngraph::OutputVector{relu1->output(0), relu2->output(0)}, 1); +std::string SplitConvConcat::getTestCaseName(const testing::TestParamInfo& obj) { + InferenceEngine::Precision precision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + std::tie(precision, inputShapes, targetDevice) = obj.param; + auto element_type = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(precision); + + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(inputShapes) << "_"; + result << "ET=" << element_type << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void SplitConvConcat::SetUp() { + InferenceEngine::Precision precision; + InferenceEngine::SizeVector inputShapes; + std::tie(precision, inputShapes, targetDevice) = this->GetParam(); + auto element_type = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(precision); + ov::Shape shape = inputShapes; - ngraph::ResultVector results{std::make_shared(concat)}; - function = std::make_shared(results, params, "SplitConvConcat"); + ov::test::BasicParams param(element_type, shape, targetDevice); + configure_test(param); } } // namespace SubgraphTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/subgraph/variadic_split_pad.cpp b/src/tests/functional/shared_test_classes/src/subgraph/variadic_split_pad.cpp index 13d1c9c542c5cb..8c7906c275e3c1 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/variadic_split_pad.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/variadic_split_pad.cpp @@ -4,48 +4,54 @@ #include "shared_test_classes/subgraph/variadic_split_pad.hpp" -namespace SubgraphTestsDefinitions { +#include "ov_models/builders.hpp" -std::string VariadicSplitPad::getTestCaseName(const testing::TestParamInfo &obj) { - InferenceEngine::SizeVector inputShape; +namespace ov { +namespace test { + +std::string VariadicSplitPad::getTestCaseName(const testing::TestParamInfo& obj) { + ov::Shape input_shape; int64_t axis; std::vector numSplits, connectIndexes; std::vector padsBegin, padsEnd; - ngraph::helpers::PadMode padMode; - InferenceEngine::Precision netPrecision; + ov::op::PadMode padMode; + ov::element::Type element_type; std::string targetName; - std::tie(inputShape, axis, numSplits, connectIndexes, padsBegin, padsEnd, padMode, netPrecision, targetName) = obj.param; + std::tie(input_shape, axis, numSplits, connectIndexes, padsBegin, padsEnd, padMode, element_type, targetName) = + obj.param; std::ostringstream results; - results << "IS=" << ov::test::utils::vec2str(inputShape) << "_"; + results << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; results << "Axis=" << axis << "_"; results << "NumSplits=" << ov::test::utils::vec2str(numSplits) << "_"; results << "ConnectIndexes=" << ov::test::utils::vec2str(connectIndexes) << "_"; results << "padsBegin=" << ov::test::utils::vec2str(padsBegin) << "_"; results << "padsEnd=" << ov::test::utils::vec2str(padsEnd) << "_"; results << "PadMode=" << padMode << "_"; - results << "netPRC=" << netPrecision.name() << "_"; + results << "netPRC=" << element_type << "_"; results << "targetDevice=" << targetName << "_"; return results.str(); } void VariadicSplitPad::SetUp() { - InferenceEngine::SizeVector inputs; + ov::Shape input_shape; int64_t axis; std::vector numSplits, connectIndexes; std::vector padBegin, padEnd; - ngraph::helpers::PadMode padMode; - InferenceEngine::Precision netPrecision; - std::tie(inputs, axis, numSplits, connectIndexes, padBegin, padEnd, padMode, netPrecision, targetDevice) = this->GetParam(); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - ov::ParameterVector input {std::make_shared(ngPrc, ov::Shape(inputs))}; + ov::op::PadMode padMode; + ov::element::Type element_type; + std::tie(input_shape, axis, numSplits, connectIndexes, padBegin, padEnd, padMode, element_type, targetDevice) = + this->GetParam(); + ov::ParameterVector input{std::make_shared(element_type, ov::Shape(input_shape))}; auto split = ngraph::builder::makeVariadicSplit(input[0], numSplits, axis); - ngraph::ResultVector results; + ov::ResultVector results; for (size_t i : connectIndexes) { auto pad = ngraph::builder::makePad(split->output(i), padBegin, padEnd, 0, padMode); - results.push_back(std::make_shared(pad)); + results.push_back(std::make_shared(pad)); } - function = std::make_shared(results, input, "variadic_split_pad"); + function = std::make_shared(results, input, "variadic_split_pad"); } -} // namespace SubgraphTestsDefinitions + +} // namespace test +} // namespace ov diff --git a/src/tests/ov_helpers/ov_lpt_models/CMakeLists.txt b/src/tests/ov_helpers/ov_lpt_models/CMakeLists.txt index f2b4514c5b0d32..7eda3438659f0d 100644 --- a/src/tests/ov_helpers/ov_lpt_models/CMakeLists.txt +++ b/src/tests/ov_helpers/ov_lpt_models/CMakeLists.txt @@ -12,7 +12,7 @@ ov_add_target( ROOT ${PUBLIC_HEADERS_DIR} INCLUDES PUBLIC - ${PUBLIC_HEADERS_DIR} + "$" ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES @@ -20,13 +20,14 @@ ov_add_target( ov_models openvino::runtime::dev ADD_CPPLINT - DEPENDENCIES - ov_models - DEVELOPER_PACKAGE - tests ) ov_build_target_faster(${TARGET_NAME} UNITY PCH PRIVATE "src/precomp.hpp" ) + +# install & export + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${PUBLIC_HEADERS_DIR}/") diff --git a/src/tests/ov_helpers/ov_models/CMakeLists.txt b/src/tests/ov_helpers/ov_models/CMakeLists.txt index 6d2989f94af734..69631bd82ba2a0 100644 --- a/src/tests/ov_helpers/ov_models/CMakeLists.txt +++ b/src/tests/ov_helpers/ov_models/CMakeLists.txt @@ -12,22 +12,25 @@ ov_add_target( ROOT ${PUBLIC_HEADERS_DIR} INCLUDES PUBLIC - ${PUBLIC_HEADERS_DIR} + "$" ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES PUBLIC openvino::runtime openvino::reference - interpreter_backend + openvino::interpreter_backend openvino::runtime::dev common_test_utils ADD_CLANG_FORMAT - DEVELOPER_PACKAGE - tests ) ov_build_target_faster(${TARGET_NAME} UNITY PCH PRIVATE "src/precomp.hpp" ) + +# install & export + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${PUBLIC_HEADERS_DIR}/") diff --git a/src/tests/ov_helpers/ov_snippets_models/CMakeLists.txt b/src/tests/ov_helpers/ov_snippets_models/CMakeLists.txt index 69cd602bb5eab5..24f1efae26a9e8 100644 --- a/src/tests/ov_helpers/ov_snippets_models/CMakeLists.txt +++ b/src/tests/ov_helpers/ov_snippets_models/CMakeLists.txt @@ -5,8 +5,6 @@ set(TARGET_NAME ov_snippets_models) set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") -set(SNIPPETS_INCLUDES "$/include") -set(COMMON_TEST_UTILS_INCLUDES "$") ov_add_target( NAME ${TARGET_NAME} @@ -14,11 +12,8 @@ ov_add_target( ROOT ${PUBLIC_HEADERS_DIR} INCLUDES PUBLIC - ${PUBLIC_HEADERS_DIR} - ${COMMON_TEST_UTILS_INCLUDES} - PRIVATE - ${SNIPPETS_INCLUDES} - + "$" + "$" ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES @@ -28,11 +23,14 @@ ov_add_target( openvino::snippets ov_lpt_models ADD_CPPLINT - DEVELOPER_PACKAGE - tests ) ov_build_target_faster(${TARGET_NAME} UNITY PCH PRIVATE "src/precomp.hpp" ) + +# install & export + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${PUBLIC_HEADERS_DIR}/") diff --git a/src/tests/test_utils/common_test_utils/CMakeLists.txt b/src/tests/test_utils/common_test_utils/CMakeLists.txt index abf36d4fa3864d..1112ccd08558af 100644 --- a/src/tests/test_utils/common_test_utils/CMakeLists.txt +++ b/src/tests/test_utils/common_test_utils/CMakeLists.txt @@ -23,8 +23,6 @@ function(add_common_utils ADD_TARGET_NAME) EXCLUDED_SOURCE_PATHS ${TARGET_EXCLUDED_SOURCE_PATHS} ADD_CLANG_FORMAT - DEVELOPER_PACKAGE - tests LINK_LIBRARIES PUBLIC gtest @@ -38,12 +36,14 @@ function(add_common_utils ADD_TARGET_NAME) openvino::shape_inference INCLUDES PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/include" + "$" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src" - ) + ov_developer_package_export_targets(TARGET ${ADD_TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") + if(ENABLE_CONFORMANCE_PGQL) target_compile_definitions(${ADD_TARGET_NAME} PUBLIC ENABLE_CONFORMANCE_PGQL) endif() @@ -58,31 +58,14 @@ function(add_common_utils ADD_TARGET_NAME) PCH PRIVATE "src/precomp.hpp" ) - # detecting regex support - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9) - set(USE_BOOST_RE ON) - endif() - if (USE_BOOST_RE) - target_compile_definitions(${ADD_TARGET_NAME} PUBLIC USE_BOOST_RE) - - debug_message(STATUS "Adding boost dependency") - if (CMAKE_VERBOSE_MAKEFILE) - set(Boost_DEBUG on) - endif () - find_package(Boost REQUIRED COMPONENTS regex) - target_link_libraries(${ADD_TARGET_NAME} PUBLIC ${Boost_REGEX_LIBRARY}) - target_include_directories(${ADD_TARGET_NAME} PUBLIC ${Boost_INCLUDE_DIRS}) - endif () - target_include_directories(${ADD_TARGET_NAME} PUBLIC $ PRIVATE $) - target_include_directories(${ADD_TARGET_NAME} SYSTEM PUBLIC ${OV_TESTS_ROOT}/test_utils) + target_include_directories(${ADD_TARGET_NAME} SYSTEM PUBLIC "$") target_compile_definitions(${ADD_TARGET_NAME} PUBLIC ${ARGN}) - endfunction() # Keep old name so that library can be used from NPU repo diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp index cc45a47d779d57..0bd9f4845d481b 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp @@ -115,6 +115,10 @@ inline std::string set2str(const std::set& set) { return std::string("()"); } +inline std::string bool2str(const bool val) { + return val ? "True" : "False"; +} + template std::vector> combineParams(const std::map>& keyValueSets) { std::vector> resVec; diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp index 4d2e5006eeb72d..3b876a530cdf33 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp @@ -232,9 +232,9 @@ void inline fill_random_unique_sequence(T* rawBlobDataPtr, auto value = static_cast(dist(generator)); value /= static_cast(k); if (std::is_same::value) { - elems.insert(static_cast(ov::float16(value).to_bits())); + elems.insert(static_cast(ov::float16(value))); } else if (std::is_same::value) { - elems.insert(static_cast(ov::bfloat16(value).to_bits())); + elems.insert(static_cast(ov::bfloat16(value))); } else { elems.insert(static_cast(value)); } diff --git a/src/tests/test_utils/functional_test_utils/CMakeLists.txt b/src/tests/test_utils/functional_test_utils/CMakeLists.txt index c990febcd6a0b2..e1148d82ee1132 100644 --- a/src/tests/test_utils/functional_test_utils/CMakeLists.txt +++ b/src/tests/test_utils/functional_test_utils/CMakeLists.txt @@ -9,11 +9,9 @@ ov_add_target( TYPE STATIC ROOT ${CMAKE_CURRENT_SOURCE_DIR} ADD_CLANG_FORMAT - DEVELOPER_PACKAGE - tests INCLUDES PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/include" + "$" ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES @@ -24,13 +22,17 @@ ov_add_target( PRIVATE ov_models openvino::pugixml - INCLUDES - PUBLIC - $ ) -install(DIRECTORY layer_tests_summary DESTINATION tests/functional_test_utils COMPONENT tests EXCLUDE_FROM_ALL) - ov_build_target_faster(${TARGET_NAME} PCH PRIVATE "src/precomp.hpp" ) + +# install & export + +install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/layer_tests_summary" + DESTINATION tests/functional_test_utils + COMPONENT tests EXCLUDE_FROM_ALL) + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv index 84d3e26eb35fc2..07f091dd7a222b 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv @@ -1130,3 +1130,10 @@ conformance_PRelu/ReadIRTest.ImportExport/Op=PRelu.1_Type=f32_IR=20e7e74f55eb5fb conformance_RegionYolo/ReadIRTest.ImportExport/Op=RegionYolo.1_Type=f32_IR=RegionYolo-1_750_Device=CPU_Shape=static_Config=(),5.06332e-06 conformance_Add/ReadIRTest.ImportExport/Op=Add.1_Type=i32_IR=28f23780d4ca0d40671caf79d5cd9223ad8f6dc2fa5ade2521f3d99586eeeb7f_Device=CPU_Shape=static_Config=(),9.72615e-07 conformance_Convolution/ReadIRTest.Inference/Op=Convolution.1_Type=f32_IR=c301804445f273eef62f41f02204711d9d6e571da28c76ab447d7d90983b0032_Device=CPU_Shape=dynamic_Config=(),0.000113281 +conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseAnd_opset13_Device=CPU_Config=(),1 +conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseOr_opset13_Device=CPU_Config=(),1 +conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseNot_opset13_Device=CPU_Config=(),1 +conformance/OpImplCheckTest.checkPluginImplementation/Function=Multinomial_opset13_Device=CPU_Config=(),1 +conformance/OpImplCheckTest.checkPluginImplementation/Function=NMSRotated_opset13_Device=CPU_Config=(),1 +conformance/OpImplCheckTest.checkPluginImplementation/Function=LSTMSequence_opset1_Device=CPU_Config=(),1 +conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseXor_opset13_Device=CPU_Config=(),1 diff --git a/src/tests/test_utils/unit_test_utils/CMakeLists.txt b/src/tests/test_utils/unit_test_utils/CMakeLists.txt index 7b8607d6bae28c..dd2bb77aa52224 100644 --- a/src/tests/test_utils/unit_test_utils/CMakeLists.txt +++ b/src/tests/test_utils/unit_test_utils/CMakeLists.txt @@ -15,14 +15,20 @@ ov_add_target( TYPE STATIC ROOT ${CMAKE_CURRENT_SOURCE_DIR} ADD_CLANG_FORMAT - DEVELOPER_PACKAGE - tests LINK_LIBRARIES PUBLIC common_test_utils_s inference_engine_s gmock + DEPENDENCIES + mock_engine INCLUDES PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/.." + "$" ) + +# install & export + +ov_developer_package_export_targets(TARGET ${TARGET_NAME} + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/../unit_test_utils" + INSTALL_DESTIONATION "developer_package/include/unit_test_utils/unit_test_utils") diff --git a/tests/layer_tests/onnx_tests/test_roi_align.py b/tests/layer_tests/onnx_tests/test_roi_align.py index 392856eaff7b2d..13663808a3acd3 100644 --- a/tests/layer_tests/onnx_tests/test_roi_align.py +++ b/tests/layer_tests/onnx_tests/test_roi_align.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest from common.layer_test_class import check_ir_version @@ -133,6 +135,7 @@ def create_net(self, input_shape, rois_shape, indices_shape, output_shape, @pytest.mark.parametrize("params", test_data) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Windows', reason="Ticket - 122731") def test_roi_alignv10(self, params, ie_device, precision, ir_version, temp_dir, use_old_api): # TODO: ticket for investigating GPU failures: CVS-86300 if ie_device != "GPU": diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index 4ce42db7fa9167..c564b1bb3731b9 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -104,31 +104,94 @@ def test_full_out(self, shape, value, dtype, with_names, ie_device, precision, i ir_version, kwargs_to_prepare_input={'value': value}) class TestFill(PytorchLayerTest): - def _prepare_input(self, value, shape, input_dtype, value_dtype): - return (np.random.randn(*shape).astype(input_dtype), np.array(value, dtype=value_dtype),) + def _prepare_input(self, value, shape, input_dtype, value_dtype, out=False): + if not out: + return (np.random.randn(*shape).astype(input_dtype), np.array(value, dtype=value_dtype),) + return (np.random.randn(*shape).astype(input_dtype), np.array(value, dtype=value_dtype), np.zeros(shape, dtype=input_dtype)) - def create_model(self): + + def create_model(self, mode): import torch class aten_fill(torch.nn.Module): + def __init__(self, mode) -> None: + super().__init__() + if mode == "inplace": + self.forward = self.forward_inplace + if mode == "out": + self.forward = self.forward_out - def forward(self, input_t: torch.Tensor, x: float): + + def forward_inplace(self, input_t: torch.Tensor, x: float): return input_t.fill_(x) + + def forward_out(self, input_t: torch.Tensor, x: float, out: torch.Tensor): + return input_t.fill(x, out=out), out + + def forward(self, input_t: torch.Tensor, x:float): + return input_t.fill(x) + ref_net = None - model = aten_fill() + model = aten_fill(mode) - return model, ref_net, "aten::fill_" + return model, ref_net, "aten::fill_" if mode == "inplace" else "aten::fill" @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.parametrize("input_dtype", ["int8", "int32", "int64", "float32", "float64"]) @pytest.mark.parametrize("value_dtype", ["int8", "int32", "int64", "float32", "float64"]) + @pytest.mark.parametrize("mode", ["", "inplace", "out"]) @pytest.mark.nightly @pytest.mark.precommit - def test_fill(self, shape, value, input_dtype, value_dtype, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, - kwargs_to_prepare_input={'value': value, 'shape': shape, "input_dtype": input_dtype, "value_dtype": value_dtype}) + def test_fill(self, shape, value, input_dtype, value_dtype, mode, ie_device, precision, ir_version): + self._test(*self.create_model(mode), ie_device, precision, ir_version, + kwargs_to_prepare_input={ + 'value': value, + 'shape': shape, + "input_dtype": input_dtype, + "value_dtype": value_dtype, + "out": mode == "out" + }) + +class TestFillDiagonal(PytorchLayerTest): + def _prepare_input(self, shape, input_dtype, value, value_dtype): + return np.zeros(shape).astype(input_dtype), np.array(value, dtype=value_dtype) + + def create_model(self, shape, wrap): + import torch + + class aten_fill_diagonal(torch.nn.Module): + def __init__(self, input_shape, wrap=False) -> None: + super().__init__() + self.wrap = wrap + self.input_shape = input_shape + + def forward(self, x:torch.Tensor, y:float): + x = x.reshape(self.input_shape) + return x.fill_diagonal_(y, wrap=self.wrap), x + + ref_net = None + + model = aten_fill_diagonal(shape, wrap) + return model, "aten::fill_diagonal_", ref_net + + @pytest.mark.parametrize("shape", ([4, 4], [5, 4], [8, 4], [4, 3], [5, 5, 5], [3, 3, 3, 3], [4, 4, 4, 4, 4])) + @pytest.mark.parametrize("value", [0, 1, -1, 2.5]) + @pytest.mark.parametrize("input_dtype", ["int8", "int32", "int64", "float32", "float64"]) + @pytest.mark.parametrize("value_dtype", ["int8", "int32", "int64", "float32", "float64"]) + @pytest.mark.parametrize("wrap", [True, False]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_fill_diagonal(self, shape, value, input_dtype, value_dtype, wrap, ie_device, precision, ir_version): + self._test(*self.create_model(shape, wrap), ie_device, precision, ir_version, + kwargs_to_prepare_input={ + 'value': value, + 'shape': shape, + "input_dtype": input_dtype, + "value_dtype": value_dtype + }) + class TestZero(PytorchLayerTest): def _prepare_input(self, shape, input_dtype): diff --git a/tests/layer_tests/requirements.txt b/tests/layer_tests/requirements.txt index b1b76c54b92abe..fd15d8bf32405d 100644 --- a/tests/layer_tests/requirements.txt +++ b/tests/layer_tests/requirements.txt @@ -1,6 +1,7 @@ -c ../constraints.txt # paddlepaddle # ticket 95904 numpy +onnxruntime requests torch torchvision diff --git a/tests/layer_tests/tensorflow_tests/test_tf_MatMul.py b/tests/layer_tests/tensorflow_tests/test_tf_MatMul.py index 460afb662851e7..2a93291af28230 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_MatMul.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_MatMul.py @@ -36,7 +36,7 @@ def create_net_with_matmul_op(self, x_shape, y_shape, x_bool, y_bool, op_type, i elif op_type == 'BatchMatMulV3': op_type_to_tf[op_type](x=tf_x, y=tf_y, Tout=tf.float32, adj_x=x_bool, adj_y=y_bool, name='Operation') else: - raise RuntimeError("Undknown operation") + raise RuntimeError("Unknown operation") tf.compat.v1.global_variables_initializer() tf_net = sess.graph_def @@ -53,7 +53,7 @@ def create_net_with_matmul_op(self, x_shape, y_shape, x_bool, y_bool, op_type, i @pytest.mark.parametrize("params", test_data_precommit) @pytest.mark.parametrize("op_type", ['BatchMatMul', 'BatchMatMulV2', - #'BatchMatMulV3', #Isn't supported + 'BatchMatMulV3', 'MatMul', ]) @pytest.mark.precommit_tf_fe @@ -72,7 +72,7 @@ def test_matmul_op_precommit(self, params, ie_device, precision, ir_version, tem @pytest.mark.parametrize("params", test_data) @pytest.mark.parametrize("op_type", ['BatchMatMul', 'BatchMatMulV2', - #'BatchMatMulV3', #Isn't supported + 'BatchMatMulV3', 'MatMul', ]) @pytest.mark.parametrize("x_bool", [ diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py new file mode 100644 index 00000000000000..7c80fbdad88b09 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py @@ -0,0 +1,49 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestXlog1py(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + assert 'y' in inputs_info + x_shape = inputs_info['x'] + y_shape = inputs_info['y'] + inputs_data = {} + # x = [-3 ,3] y = [1, 2] + # generate x in way to have zeros + inputs_data['x'] = (6 * np.random.random(size=x_shape).astype(np.float32) - 3) * \ + np.random.randint(2, size=x_shape).astype(np.float32) + inputs_data['y'] = np.random.random(size=y_shape).astype(np.float32) + 1 + return inputs_data + + def create_xlog1py_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + y = tf.compat.v1.placeholder(input_type, input_shape, 'y') + tf.raw_ops.Xlog1py(x=x, y=y) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[2, 3, 4], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_xlog1py_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_xlog1py_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py new file mode 100644 index 00000000000000..6ecddeb439aed3 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py @@ -0,0 +1,49 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestXlogy(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + assert 'y' in inputs_info + x_shape = inputs_info['x'] + y_shape = inputs_info['y'] + inputs_data = {} + # x = [-3 ,3] y = [1, 2] + # generate x in way to have zeros + inputs_data['x'] = (6 * np.random.random(size=x_shape).astype(np.float32) - 3) * \ + np.random.randint(2, size=x_shape).astype(np.float32) + inputs_data['y'] = np.random.random(size=y_shape).astype(np.float32) + 1 + return inputs_data + + def create_xlogy_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + y = tf.compat.v1.placeholder(input_type, input_shape, 'y') + tf.raw_ops.Xlogy(x=x, y=y) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[2, 3, 4], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_xlogy_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_xlogy_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index 112aedeb60de0c..0618d98a4d9f31 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -242,7 +242,7 @@ microsoft/deberta-base,deberta microsoft/git-large-coco,git,skip,Load problem microsoft/layoutlm-base-uncased,layoutlm microsoft/layoutlmv2-base-uncased,layoutlmv2,skip,Load problem -microsoft/layoutlmv3-base,layoutlmv3,xfail,Unsupported op aten::amax aten::clip +microsoft/layoutlmv3-base,layoutlmv3 microsoft/markuplm-base,markuplm microsoft/resnet-50,resnet microsoft/speecht5_hifigan,hifigan,skip,Load problem @@ -251,7 +251,7 @@ microsoft/swinv2-tiny-patch4-window8-256,swinv2,xfail,Unsupported op aten::adapt microsoft/table-transformer-detection,table-transformer microsoft/wavlm-large,wavlm,skip,Load problem microsoft/xclip-base-patch32,xclip,skip,Load problem -microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet,xfail,Unsupported op aten::fill_diagonal_ +microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet miguelvictor/python-fromzero-lstmlm,lstmlm,skip,Load problem mingzi151/test-hf-wav2vec2bert,wav2vec2bert,skip,Load problem MIT/ast-finetuned-audioset-10-10-0.4593,audio-spectrogram-transformer,skip,Load problem @@ -348,7 +348,7 @@ SteveZhan/my-resnet50d,resnet_steve,skip,Load problem suno/bark,bark,skip,Load problem surajnair/r3m-50,r3m,skip,Load problem susnato/clvp_dev,clvp,skip,Load problem -Tanrei/GPTSAN-japanese,gptsan-japanese,xfail,Unsupported op aten::clip aten::index_put_ prim::TupleConstruct +Tanrei/GPTSAN-japanese,gptsan-japanese,xfail,Unsupported op aten::index_put_ prim::TupleConstruct tau/bart-large-sled-govreport,tau/sled,skip,Load problem taufeeque/best-cb-model,codebook,skip,Load problem Team-PIXEL/pixel-base,pixel,skip,Load problem @@ -357,7 +357,7 @@ thomwolf/vqgan_imagenet_f16_1024,vqgan_model,skip,Load problem thu-ml/zh-clip-vit-roberta-large-patch14,zhclip,skip,Load problem tifa-benchmark/promptcap-coco-vqa,ofa,skip,Load problem tli8hf/robertabase_snli,transformerfornli,skip,Load problem -transfo-xl-wt103,transfo-xl,xfail,Unsupported op aten::clamp_ aten::index_copy_ +transfo-xl-wt103,transfo-xl,xfail,Unsupported op aten::index_copy_ transZ/BART_shared_clean,shared_bart,skip,Load problem transZ/BART_shared_v2,shared_bart_v2,skip,Load problem transZ/misecom,misecom,skip,Load problem diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 3a677353c86508..184e725a04f9b9 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -298,7 +298,10 @@ def teardown_method(self): ("google/tapas-large-finetuned-wtq", "tapas"), ("gpt2", "gpt2"), ("openai/clip-vit-large-patch14", "clip"), - ("RWKV/rwkv-4-169m-pile", "rwkv")]) + ("RWKV/rwkv-4-169m-pile", "rwkv"), + ("microsoft/layoutlmv3-base", "layoutlmv3"), + ("microsoft/xprophetnet-large-wiki100-cased", "xlm-prophetnet"), + ]) @pytest.mark.precommit def test_convert_model_precommit(self, name, type, ie_device): self.run(model_name=name, model_link=type, ie_device=ie_device) diff --git a/tests/model_hub_tests/torch_tests/test_timm.py b/tests/model_hub_tests/torch_tests/test_timm.py index d08f6a8c4a9a40..d6dd438df5c6c6 100644 --- a/tests/model_hub_tests/torch_tests/test_timm.py +++ b/tests/model_hub_tests/torch_tests/test_timm.py @@ -10,9 +10,28 @@ from openvino import convert_model +def filter_timm(timm_list: list) -> list: + unique_models = set() + filtered_list = [] + ignore_set = {"base", "mini", "small", "xxtiny", "xtiny", "tiny", "lite", "nano", "pico", "medium", "big", + "large", "xlarge", "xxlarge", "huge", "gigantic", "giant", "enormous", "xs", "xxs", "s", "m", "l", "xl"} + for name in timm_list: + # first: remove datasets + name_parts = name.split(".") + _name = "_".join(name.split(".")[:-1]) if len(name_parts) > 1 else name + # second: remove sizes + name_set = set([n for n in _name.split("_") if not n.isnumeric()]) + name_set = name_set.difference(ignore_set) + name_join = "_".join(name_set) + if name_join not in unique_models: + unique_models.add(name_join) + filtered_list.append(name) + return filtered_list + + def get_all_models() -> list: m_list = timm.list_pretrained() - return m_list + return filter_timm(m_list) # To make tests reproducible we seed the random generator @@ -61,6 +80,7 @@ def teardown_method(self): def test_convert_model_precommit(self, name, ie_device): self.run(name, None, ie_device) + @pytest.mark.nightly @pytest.mark.parametrize("name", get_all_models()) def test_convert_model_all_models(self, name, ie_device): self.run(name, None, ie_device) diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake index 8ef6c5bf1c51ea..fac4752c318250 100644 --- a/thirdparty/dependencies.cmake +++ b/thirdparty/dependencies.cmake @@ -51,9 +51,6 @@ if(X86_64 OR X86 OR UNIVERSAL2) # conan creates alias xbyak::xbyak, no extra steps are required else() add_subdirectory(thirdparty/xbyak EXCLUDE_FROM_ALL) - # export and install xbyak - openvino_developer_export_targets(COMPONENT openvino_common TARGETS xbyak::xbyak) - ov_install_static_lib(xbyak ${OV_CPACK_COMP_CORE}) endif() endif() @@ -269,12 +266,13 @@ if(NOT TARGET openvino::pugixml) function(ov_build_pugixml) function(ov_build_pugixml_static) set(BUILD_SHARED_LIBS OFF) + set(PUGIXML_INSTALL OFF CACHE BOOL "" FORCE) add_subdirectory(thirdparty/pugixml EXCLUDE_FROM_ALL) endfunction() ov_build_pugixml_static() set_property(TARGET pugixml-static PROPERTY EXPORT_NAME pugixml) add_library(openvino::pugixml ALIAS pugixml-static) - openvino_developer_export_targets(COMPONENT openvino_common TARGETS openvino::pugixml) + ov_developer_package_export_targets(TARGET openvino::pugixml) ov_install_static_lib(pugixml-static ${OV_CPACK_COMP_CORE}) endfunction() @@ -300,7 +298,7 @@ if(ENABLE_GAPI_PREPROCESSING) add_subdirectory(thirdparty/ade EXCLUDE_FROM_ALL) set_target_properties(ade PROPERTIES FOLDER thirdparty) - openvino_developer_export_targets(COMPONENT openvino_common TARGETS ade) + ov_developer_package_export_targets(TARGET ade) ov_install_static_lib(ade ${OV_CPACK_COMP_CORE}) endif() @@ -316,7 +314,7 @@ if(ENABLE_GAPI_PREPROCESSING) endif() set_target_properties(fluid PROPERTIES FOLDER thirdparty) - openvino_developer_export_targets(COMPONENT openvino_common TARGETS fluid) + ov_developer_package_export_targets(TARGET fluid) ov_install_static_lib(fluid ${OV_CPACK_COMP_CORE}) endif() @@ -369,7 +367,7 @@ if(ENABLE_SAMPLES OR ENABLE_TESTS) if(NOT TARGET gflags) add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL) - openvino_developer_export_targets(COMPONENT openvino_common TARGETS gflags) + ov_developer_package_export_targets(TARGET gflags) endif() endif() @@ -391,8 +389,14 @@ if(ENABLE_TESTS) endforeach() else() add_subdirectory(thirdparty/gtest EXCLUDE_FROM_ALL) - openvino_developer_export_targets(COMPONENT tests - TARGETS gmock gmock_main gtest gtest_main) + # install & export + set(googletest_root "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gtest/gtest") + ov_developer_package_export_targets(TARGET gtest_main + INSTALL_INCLUDE_DIRECTORIES "${googletest_root}/googletest/include/") + ov_developer_package_export_targets(TARGET gtest + INSTALL_INCLUDE_DIRECTORIES "${googletest_root}/googletest/include/") + ov_developer_package_export_targets(TARGET gmock + INSTALL_INCLUDE_DIRECTORIES "${googletest_root}/googlemock/include/") endif() endif() @@ -585,8 +589,9 @@ if(ENABLE_SAMPLES) else() add_subdirectory(thirdparty/json EXCLUDE_FROM_ALL) - # this is required only because of NPU plugin reused this - openvino_developer_export_targets(COMPONENT openvino_common TARGETS nlohmann_json) + # this is required only because of NPU plugin reused this: export & install + ov_developer_package_export_targets(TARGET nlohmann_json + INSTALL_INCLUDE_DIRECTORIES "${OpenVINO_SOURCE_DIR}/thirdparty/json/nlohmann_json/include") # for nlohmann library versions older than v3.0.0 if(NOT TARGET nlohmann_json::nlohmann_json) diff --git a/thirdparty/gtest/CMakeLists.txt b/thirdparty/gtest/CMakeLists.txt index f527552903c1d7..585b80934bc4ce 100644 --- a/thirdparty/gtest/CMakeLists.txt +++ b/thirdparty/gtest/CMakeLists.txt @@ -14,16 +14,31 @@ set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) add_subdirectory(gtest EXCLUDE_FROM_ALL) -get_target_property(gtest_include_dirs gtest INTERFACE_INCLUDE_DIRECTORIES) -set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${gtest_include_dirs}") +function(_ov_gtest_filter_install_interface TARGET TYPE) + set(final_include_dirs "$") + + get_target_property(include_dirs ${TARGET} INTERFACE_INCLUDE_DIRECTORIES) + foreach(include_dir IN LISTS include_dirs) + if(NOT include_dir MATCHES ".*INSTALL_INTERFACE.*") + # remove leading and trailing parts of generator expressions + string(REPLACE "$" "" include_dir "${include_dir}") + # wrap to BUILD_INTERFACE again + list(APPEND final_include_dirs "$") + endif() + endforeach() -get_target_property(gmock_include_dirs gtest INTERFACE_INCLUDE_DIRECTORIES) -set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES - "${gmock_include_dirs};${gmock_SOURCE_DIR}/include") + set_target_properties(${TARGET} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${final_include_dirs}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${final_include_dirs}") +endfunction() -set(gtest_targets gtest gtest_main gmock gmock_main) +_ov_gtest_filter_install_interface(gtest gtest) +_ov_gtest_filter_install_interface(gtest_main gtest) +_ov_gtest_filter_install_interface(gmock gmock) +_ov_gtest_filter_install_interface(gmock_main gmock) -foreach(target IN LISTS gtest_targets) +foreach(target gtest gtest_main gmock gmock_main) # If we have specified /Z7 option, remove -Zi option which comes from gtest if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") get_target_property(_target_cxx_flags ${target} COMPILE_OPTIONS) @@ -41,8 +56,7 @@ foreach(target IN LISTS gtest_targets) target_compile_options(${target} PRIVATE -Wno-deprecated-copy) endif() endif() + # disable warnings ov_disable_all_warnings(${target}) + set_target_properties(${target} PROPERTIES FOLDER thirdparty) endforeach() - -set_target_properties(${gtest_targets} - PROPERTIES FOLDER thirdparty) diff --git a/thirdparty/ittapi/CMakeLists.txt b/thirdparty/ittapi/CMakeLists.txt index fe821f74feaaac..14e18f8bbced68 100644 --- a/thirdparty/ittapi/CMakeLists.txt +++ b/thirdparty/ittapi/CMakeLists.txt @@ -4,8 +4,8 @@ if(DEFINED INTEL_VTUNE_DIR OR DEFINED ENV{INTEL_VTUNE_DIR}) find_package(ITT - PATHS "${OpenVINO_SOURCE_DIR}/src/common/itt/cmake" - NO_DEFAULT_PATH) + PATHS "${OpenVINO_SOURCE_DIR}/src/common/itt/cmake" + NO_DEFAULT_PATH) if(NOT ITT_FOUND) message(WARNING "Profiling option enabled, but no ITT library was found under INTEL_VTUNE_DIR") endif() @@ -25,6 +25,6 @@ else() # create alias ittapi::ittnotify add_library(ittapi::ittnotify ALIAS ittnotify) - openvino_developer_export_targets(COMPONENT openvino_common TARGETS ittapi::ittnotify) + ov_developer_package_export_targets(TARGET ittapi::ittnotify) ov_install_static_lib(ittnotify ${OV_CPACK_COMP_CORE}) endif() diff --git a/thirdparty/pugixml b/thirdparty/pugixml index a0e064336317c9..2e357d19a3228c 160000 --- a/thirdparty/pugixml +++ b/thirdparty/pugixml @@ -1 +1 @@ -Subproject commit a0e064336317c9347a91224112af9933598714e9 +Subproject commit 2e357d19a3228c0a301727aac6bea6fecd982d21 diff --git a/tools/mo/automation/package_BOM.txt b/tools/mo/automation/package_BOM.txt index 0780ce7eba8151..b9bc64d1c8bf08 100644 --- a/tools/mo/automation/package_BOM.txt +++ b/tools/mo/automation/package_BOM.txt @@ -928,6 +928,7 @@ openvino/tools/mo/ops/MatMul.py openvino/tools/mo/ops/memory.py openvino/tools/mo/ops/memoryoffset.py openvino/tools/mo/ops/merge.py +openvino/tools/mo/ops/multinomial.py openvino/tools/mo/ops/mvn.py openvino/tools/mo/ops/mxfft.py openvino/tools/mo/ops/mxrepeat.py @@ -1106,4 +1107,4 @@ openvino/tools/mo/utils/tensorboard_util.py openvino/tools/mo/utils/type_utils.py openvino/tools/mo/utils/unsupported_ops.py openvino/tools/mo/utils/utils.py -openvino/tools/mo/utils/version.py \ No newline at end of file +openvino/tools/mo/utils/version.py diff --git a/tools/mo/openvino/tools/mo/ops/Cast.py b/tools/mo/openvino/tools/mo/ops/Cast.py index 77beb07c74122e..24409912429f07 100644 --- a/tools/mo/openvino/tools/mo/ops/Cast.py +++ b/tools/mo/openvino/tools/mo/ops/Cast.py @@ -36,12 +36,14 @@ def backend_attrs(self): @staticmethod def type_infer(node: Node): - assert node.has_valid('dst_type'), 'Destination type of "Cast" operation should be extracted earlier' + assert node.has_valid( + 'dst_type'), 'Destination type of "Cast" operation should be extracted earlier' node.out_port(0).set_data_type(node.dst_type) @staticmethod def helper_value_propagation(node_name, value, dst_type): - new_blob, finite_match_count, zero_match_count = convert_blob(value, dst_type) + new_blob, finite_match_count, zero_match_count = convert_blob( + value, dst_type) if finite_match_count: log.error("{} elements of {} were clipped to infinity while converting an input blob for node '{}' to {}." @@ -63,6 +65,10 @@ def custom_type_casting_and_packing(node: Node, value, dst_type): we would pad them to 6 element with the last element as zero and we would pack them into 3 uint8 values """ assert dst_type in [packed_U4, packed_I4] + # TODO: Remove this comment when it's clear that we can fix it easily + # raise Exception("Packing of u4/i4 data is no longer supported in mo because it is now incompatible with the new " + # "order of the halfs of a byte that was introduced in OpenVINO runtime recently. Use ovc " + # "command line tool or openvino.convert_model python function instead.") minimum_regular_dtype = np.uint8 if dst_type == packed_U4 else np.int8 # initial casing from the source type to the numpy-friendly type which could absorb all the values of dst_type @@ -83,10 +89,12 @@ def custom_type_casting_and_packing(node: Node, value, dst_type): padded = np.concatenate((flattened, np.zeros([pad], dtype=minimum_regular_dtype))) assert np.prod(padded.shape) % num_values_fitting_into_uint8 == 0 - bit_order_little = (padded[:, None] & (1 << np.arange(num_bits)) > 0).astype(np.uint8) - bit_order_big = np.flip(bit_order_little, axis=1) - bit_order_big_flattened = bit_order_big.flatten() - packed = np.packbits(bit_order_big_flattened) + bit_order_little = (padded[:, None] & ( + 1 << np.arange(num_bits)) > 0).astype(np.uint8) + bit_order_big_flattened = bit_order_little.flatten() + # u1 still has reversed bit order: + packed = np.packbits(bit_order_big_flattened, + bitorder='little' if num_bits > 1 else 'big') node.out_node(0)['force_shape'] = data_shape.copy() node.out_node(0)['force_type'] = np_data_type_to_precision(dst_type) diff --git a/tools/mo/openvino/tools/mo/ops/multinomial.py b/tools/mo/openvino/tools/mo/ops/multinomial.py new file mode 100644 index 00000000000000..42f4b0d3eedbb9 --- /dev/null +++ b/tools/mo/openvino/tools/mo/ops/multinomial.py @@ -0,0 +1,69 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np + +from openvino.tools.mo.front.common.partial_infer.utils import dynamic_dimension +from openvino.tools.mo.front.extractor import bool_to_str +from openvino.tools.mo.graph.graph import Graph, Node + +from openvino.tools.mo.ops.op import Op + + +class Multinomial(Op): + op = 'Multinomial' + enabled = False + + def __init__(self, graph: Graph, attrs: dict): + super().__init__(graph, { + 'type': self.op, + 'op': self.op, + 'version': 'opset13', + 'infer': self.infer, + 'in_ports_count': 2, + 'out_ports_count': 1, + 'type_infer': self.type_infer, + 'with_replacement': False, + 'log_probs': False, + 'global_seed': 0, + 'op_seed': 0, + 'convert_type': np.int64, + }, attrs) + + def backend_attrs(self): + return ['convert_type', + ('with_replacement', lambda node: bool_to_str( + node, 'with_replacement')), + ('log_probs', lambda node: bool_to_str(node, 'log_probs')), + 'global_seed', + 'op_seed'] + + def supported_attrs(self): + return ['convert_type', + 'with_replacement', + 'log_probs', + 'global_seed', + 'op_seed'] + + @staticmethod + def type_infer(node: Node): + assert node.has_valid('convert_type') + if node['convert_type'] == 'i32': + node.out_port(0).set_data_type(np.int32) + else: + node.out_port(0).set_data_type(np.int64) + + @staticmethod + def infer(node: Node): + + input_shape = node.in_node(0).shape + output_shape = [] + if input_shape is not None and input_shape.size == 2: + output_shape.append(input_shape[0]) + + num_samples = node.in_port(1).data.get_value() + if num_samples is not None: + output_shape.append(num_samples) + else: + output_shape.append(dynamic_dimension) + node.out_port(0).data.set_shape(output_shape) diff --git a/tools/mo/unit_tests/mo/ops/cast_test.py b/tools/mo/unit_tests/mo/ops/cast_test.py index 985a7276514235..73a468e9fb80fa 100644 --- a/tools/mo/unit_tests/mo/ops/cast_test.py +++ b/tools/mo/unit_tests/mo/ops/cast_test.py @@ -21,81 +21,20 @@ class TestCastTest(): """ Example of checking: - 7 == 0111, padded to 0111 0000, results in 112 - 7 == 0111, 8 == 1000 packed to 0111 1000, results in 120 + 7 == 0111, padded to 00000111, results in 7 + 7 == 0111, 8 == 1000 packed to 10000111, results in 7+16 - -8 == 1000, padded to 1000 0000, results in 128 + -8 == 1000, padded to 00001000, results in 8 """ - @pytest.mark.parametrize("value, expected, custom_dtype",[ - ([0], [0], packed_U4), - ([1], [16], packed_U4), - ([2], [32], packed_U4), - ([3], [48], packed_U4), - ([4], [64], packed_U4), - ([5], [80], packed_U4), - ([6], [96], packed_U4), - ([7], [112], packed_U4), - ([8], [128], packed_U4), - ([9], [144], packed_U4), - ([10], [160], packed_U4), - ([11], [176], packed_U4), - ([12], [192], packed_U4), - ([13], [208], packed_U4), - ([14], [224], packed_U4), - ([15], [240], packed_U4), - - ([0, 15], [15], packed_U4), - ([1, 14], [30], packed_U4), - ([2, 13], [45], packed_U4), - ([3, 12], [60], packed_U4), - ([4, 11], [75], packed_U4), - ([5, 10], [90], packed_U4), - ([6, 9], [105], packed_U4), - ([7, 8], [120], packed_U4), - ([8, 7], [135], packed_U4), - ([9, 6], [150], packed_U4), - ([10, 5], [165], packed_U4), - ([11, 4], [180], packed_U4), - ([12, 3], [195], packed_U4), - ([13, 2], [210], packed_U4), - ([14, 1], [225], packed_U4), - ([15, 0], [240], packed_U4), - - ([-8], [128], packed_I4), - ([-7], [144], packed_I4), - ([-6], [160], packed_I4), - ([-5], [176], packed_I4), - ([-4], [192], packed_I4), - ([-3], [208], packed_I4), - ([-2], [224], packed_I4), - ([-1], [240], packed_I4), - ([0], [0], packed_I4), - ([1], [16], packed_I4), - ([2], [32], packed_I4), - ([3], [48], packed_I4), - ([4], [64], packed_I4), - ([5], [80], packed_I4), - ([6], [96], packed_I4), - ([7], [112], packed_I4), - - ([-8, 7], [135], packed_I4), - ([-7, 6], [150], packed_I4), - ([-6, 5], [165], packed_I4), - ([-5, 4], [180], packed_I4), - ([-4, 3], [195], packed_I4), - ([-3, 2], [210], packed_I4), - ([-2, 1], [225], packed_I4), - ([-1, 0], [240], packed_I4), - ([0, -1], [15], packed_I4), - ([1, -2], [30], packed_I4), - ([2, -3], [45], packed_I4), - ([3, -4], [60], packed_I4), - ([4, -5], [75], packed_I4), - ([5, -6], [90], packed_I4), - ([6, -7], [105], packed_I4), - ([7, -8], [120], packed_I4), - ]) + @pytest.mark.parametrize("value, expected, custom_dtype", + [([i], [i], packed_U4) for i in range(16)] + + [([i, 15-i], [i + (15-i)*16], packed_U4) for i in range(16)] + + [([-i], [16-i], packed_I4) for i in range(1, 8+1)] + + [([i], [i], packed_I4) for i in range(8)] + + [([-i-1, i], [16-i-1 + 16*i], packed_I4) for i in range(8)] + + [([i, -i-1], [i + 16*(16-i-1)], packed_I4) for i in range(8)] + ) def test_custom_value_propagation(self, value, expected, custom_dtype): graph = build_graph(nodes(value, custom_dtype), [ *connect('value', 'convert'), *connect('convert', 'output'), diff --git a/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py b/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py index 62cd013ad23093..3e5b35ef62fabb 100644 --- a/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py +++ b/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py @@ -10,7 +10,7 @@ import openvino.runtime.opset12 as opset12 import openvino.runtime.opset11 as opset11 import openvino.runtime.opset10 as opset10 -from openvino.runtime import Model, serialize, Core, PartialShape, Dimension +from openvino.runtime import Model, serialize, Core, PartialShape, Dimension, Type from openvino.tools.mo.utils.ir_reader.restore_graph import restore_graph_from_ir, save_restored_graph from openvino.tools.mo.utils.logger import init_logger @@ -23,16 +23,22 @@ class TestOps(unittest.TestCase): @staticmethod def check_graph_can_save(model, name): with tempfile.TemporaryDirectory() as tmp: - model_xml = Path(tmp) / (name + '.xml') - model_bin = Path(tmp) / (name + '.bin') + tmp_path = Path(tmp) + model_xml = tmp_path / (name + '.xml') + model_bin = tmp_path / (name + '.bin') serialize(model, model_xml, model_bin) graph, _ = restore_graph_from_ir(model_xml, model_bin) - save_restored_graph(graph, tmp, {}, name) + save_restored_graph(graph, tmp, {}, name + '_restored') # restore 2 times to validate that after save graph doesn't lose attributes etc. - graph, _ = restore_graph_from_ir(model_xml, model_bin) + restored_model_xml = tmp_path / (name + '_restored.xml') + restored_model_bin = tmp_path / (name + '_restored.bin') + graph, _ = restore_graph_from_ir( + restored_model_xml, restored_model_bin) + core = Core() + core.set_property({"ENABLE_MMAP": False}) # check that re-saved model can be read in runtime - Core().read_model(model_xml) - return graph + model = core.read_model(restored_model_xml) + return graph, model def test_topk_11(self): data_shape = [6, 12, 10, 24] @@ -43,7 +49,7 @@ def test_topk_11(self): topk = opset11.topk(data_parameter, k_val, axis, "max", "value", stable=True, name="TopK_11") model = Model(topk, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'topk_model') + graph, _ = TestOps.check_graph_can_save(model, 'topk_model') topk_node = graph.get_op_nodes(op="TopK")[0] self.assertEqual(topk_node["version"], "opset11") self.assertTrue(topk_node["stable"]) @@ -56,7 +62,7 @@ def test_interpolate_11(self): interpolate = opset11.interpolate(data_parameter, np.int32( [20, 48]), "nearest", "sizes", axes=np.int32([2, 3]), name="Interpolate_11") model = Model(interpolate, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'interpolate_model') + graph, _ = TestOps.check_graph_can_save(model, 'interpolate_model') interpolate_node = graph.get_op_nodes(op="Interpolate")[0] self.assertEqual(interpolate_node["version"], "opset11") self.assertTrue("force_precision_in_ports" in interpolate_node) @@ -69,7 +75,7 @@ def test_interpolate_11_scales(self): interpolate = opset11.interpolate(data_parameter, np.float32( [2., 2.]), "nearest", "scales", axes=np.int32([2, 3]), name="Interpolate_11") model = Model(interpolate, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'interpolate_model') + graph, _ = TestOps.check_graph_can_save(model, 'interpolate_model') interpolate_node = graph.get_op_nodes(op="Interpolate")[0] self.assertEqual(interpolate_node["version"], "opset11") self.assertTrue("force_precision_in_ports" not in interpolate_node) @@ -81,7 +87,7 @@ def test_interpolate_11_no_axes(self): interpolate = opset11.interpolate(data_parameter, np.int32( [6, 12, 20, 48]), "nearest", "sizes", name="Interpolate_11") model = Model(interpolate, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'interpolate_model') + graph, _ = TestOps.check_graph_can_save(model, 'interpolate_model') interpolate_node = graph.get_op_nodes(op="Interpolate")[0] self.assertEqual(interpolate_node["version"], "opset11") self.assertTrue("force_precision_in_ports" in interpolate_node) @@ -94,7 +100,7 @@ def test_interpolate_4(self): interpolate = opset10.interpolate(data_parameter, np.int32([20, 48]), np.float32( [2, 2]), "nearest", "sizes", axes=np.int32([2, 3]), name="Interpolate_4") model = Model(interpolate, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'interpolate4_model') + graph, _ = TestOps.check_graph_can_save(model, 'interpolate4_model') interpolate_node = graph.get_op_nodes(op="Interpolate")[0] self.assertEqual(interpolate_node["version"], "opset4") @@ -105,7 +111,7 @@ def test_unique(self): unique = opset10.unique(data_parameter, axis=np.int32( [2]), sorted=True, name="Unique_10") model = Model(unique, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'unique_model') + graph, _ = TestOps.check_graph_can_save(model, 'unique_model') unique_node = graph.get_op_nodes(op="Unique")[0] self.assertEqual(unique_node["version"], "opset10") self.assertListEqual(unique_node.out_port( @@ -118,7 +124,7 @@ def test_is_finite(self): data_shape, name="Data", dtype=np.float32) is_finite = opset10.is_finite(data_parameter, name="Is_finite_10") model = Model(is_finite, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'is_finite_model') + graph, _ = TestOps.check_graph_can_save(model, 'is_finite_model') is_finite_node = graph.get_op_nodes(op="IsFinite")[0] self.assertEqual(is_finite_node["version"], "opset10") @@ -128,7 +134,7 @@ def test_is_inf(self): data_shape, name="Data", dtype=np.float32) is_inf = opset10.is_inf(data_parameter, name="Is_inf_10") model = Model(is_inf, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'is_inf_model') + graph, _ = TestOps.check_graph_can_save(model, 'is_inf_model') is_inf_node = graph.get_op_nodes(op="IsInf")[0] self.assertEqual(is_inf_node["version"], "opset10") @@ -138,7 +144,7 @@ def test_is_nan(self): data_shape, name="Data", dtype=np.float32) is_nan = opset10.is_nan(data_parameter, name="Is_nan_10") model = Model(is_nan, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'is_nan_model') + graph, _ = TestOps.check_graph_can_save(model, 'is_nan_model') is_nan_node = graph.get_op_nodes(op="IsNaN")[0] self.assertEqual(is_nan_node["version"], "opset10") @@ -177,7 +183,7 @@ def test_if(self): out2 = if_node.set_output(then_body_res_2, else_body_res_2) model = Model([out1, out2], [parameter_x, parameter_y]) - graph = TestOps.check_graph_can_save(model, 'if_model') + graph, _ = TestOps.check_graph_can_save(model, 'if_model') if_node = graph.get_op_nodes(op="If")[0] self.assertEqual(if_node["version"], "opset8") _, layer_info, _ = if_node['IE'][0] @@ -192,7 +198,7 @@ def test_strided_slice_no_begin_end_mask(self): strided_slice = opset11.strided_slice(data_parameter, np.int32([1, 2, 3, 4]), np.int32( [3, 6, 9, 12]), np.int32([1, 1, 1, 1]), begin_mask=[], end_mask=[], name="StridedSlice_10") model = Model(strided_slice, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'strided_slice_model') + graph, _ = TestOps.check_graph_can_save(model, 'strided_slice_model') strided_slice_node = graph.get_op_nodes(op="StridedSlice")[0] self.assertEqual(strided_slice_node["version"], "opset1") @@ -206,7 +212,7 @@ def test_scatter_dynamic_shape(self): mul = opset11.multiply(scatter, np.int64([1, 2])) reshape = opset11.reshape(data_parameter, mul, True) model = Model(reshape, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'scatter_dynamic_model') + graph, _ = TestOps.check_graph_can_save(model, 'scatter_dynamic_model') scatter_update_node = graph.get_op_nodes(op="ScatterUpdate")[0] self.assertListEqual(scatter_update_node.out_port(0).data.get_value().tolist(), [0, None]) @@ -214,7 +220,7 @@ def test_pad_12(self): data_parameter = opset12.parameter([6, 12, 10, 24], name="Data", dtype=np.float32) pad = opset12.pad(data_parameter, np.int64([0, 0, -1, -2]), np.int64([0, 0, -3, -4]), "constant") model = Model(pad, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'pad_model') + graph, _ = TestOps.check_graph_can_save(model, 'pad_model') pad_node = graph.get_op_nodes(op="Pad")[0] self.assertEqual(pad_node["version"], "opset12") self.assertListEqual(pad_node.in_port(1).data.get_value().tolist(), [0, 0, -1, -2]) @@ -225,7 +231,7 @@ def test_scatter_elements_update_12(self): data_parameter = opset12.parameter([10], name="Data", dtype=np.float32) scatter = opset12.scatter_elements_update(data_parameter, np.int32([5, 0, 7, 5]), np.float32([5., 6., 1.5, -5.]), np.int32(0), "sum", False) model = Model(scatter, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'scatter_model') + graph, _ = TestOps.check_graph_can_save(model, 'scatter_model') scatter_node = graph.get_op_nodes(op="ScatterElementsUpdate")[0] self.assertListEqual(scatter_node.out_port(0).data.get_shape().tolist(), [10]) self.assertEqual(scatter_node["version"], "opset12") @@ -240,7 +246,7 @@ def test_group_norm_12(self): epsilon = 1e-6 node = opset12.group_normalization(data_parameter, scale, bias, num_groups, epsilon) model = Model(node, [data_parameter]) - graph = TestOps.check_graph_can_save(model, 'group_norm_model') + graph, _ = TestOps.check_graph_can_save(model, 'group_norm_model') gn_node = graph.get_op_nodes(op="GroupNormalization")[0] self.assertListEqual(gn_node.out_port(0).data.get_shape().tolist(), [1, 3, 3, 3]) self.assertEqual(gn_node["version"], "opset12") @@ -253,7 +259,7 @@ def test_bitwise_and_13(self): op = opset13.bitwise_and(a, b) model = Model(op, [a, b]) - graph = TestOps.check_graph_can_save(model, "bitwise_and_model") + graph, _ = TestOps.check_graph_can_save(model, "bitwise_and_model") op_node = graph.get_op_nodes(op="BitwiseAnd")[0] self.assertListEqual(op_node.out_port(0).data.get_shape().tolist(), [4, 2]) self.assertEqual(op_node["version"], "opset13") @@ -265,7 +271,7 @@ def test_bitwise_or_13(self): op = opset13.bitwise_or(a, b) model = Model(op, [a, b]) - graph = TestOps.check_graph_can_save(model, "bitwise_or_model") + graph, _ = TestOps.check_graph_can_save(model, "bitwise_or_model") op_node = graph.get_op_nodes(op="BitwiseOr")[0] self.assertListEqual(op_node.out_port(0).data.get_shape().tolist(), [4, 2]) self.assertEqual(op_node["version"], "opset13") @@ -277,7 +283,7 @@ def test_bitwise_xor_13(self): op = opset13.bitwise_xor(a, b) model = Model(op, [a, b]) - graph = TestOps.check_graph_can_save(model, "bitwise_xor_model") + graph, _ = TestOps.check_graph_can_save(model, "bitwise_xor_model") op_node = graph.get_op_nodes(op="BitwiseXor")[0] self.assertListEqual(op_node.out_port(0).data.get_shape().tolist(), [4, 2]) self.assertEqual(op_node["version"], "opset13") @@ -288,7 +294,66 @@ def test_bitwise_not_13(self): op = opset13.bitwise_not(a) model = Model(op, [a]) - graph = TestOps.check_graph_can_save(model, "bitwise_not_model") + graph, _ = TestOps.check_graph_can_save(model, "bitwise_not_model") op_node = graph.get_op_nodes(op="BitwiseNot")[0] self.assertListEqual(op_node.out_port(0).data.get_shape().tolist(), [4, 2]) self.assertEqual(op_node["version"], "opset13") + + def test_multinomial_13_param_inputs(self): + data_shape = [2, 8] + probs = opset13.parameter( + data_shape, name="probs", dtype=np.float32) + num_samples = opset13.parameter( + [1], name="num_samples", dtype=np.int32) + + op = opset13.multinomial(probs, num_samples, + convert_type="i32", + with_replacement=True, + log_probs=True, + global_seed=456, + op_seed=213) + + model = Model(op, [probs, num_samples]) + graph, loaded_model = TestOps.check_graph_can_save( + model, 'multinomial_param_model') + graph_node = graph.get_op_nodes(op="Multinomial")[0] + + self.assertEqual(graph_node["version"], "opset13") + self.assertListEqual(graph_node.out_port( + 0).data.get_shape().tolist(), [2, None]) + self.assertEqual(graph_node["convert_type"], "i32") + self.assertTrue(graph_node["with_replacement"]) + self.assertTrue(graph_node["log_probs"]) + self.assertEqual(graph_node["global_seed"], 456) + self.assertEqual(graph_node["op_seed"], 213) + self.assertEqual(loaded_model.get_output_element_type(0), Type.i32) + self.assertEqual(loaded_model.get_output_partial_shape( + 0), PartialShape([2, -1])) + + def test_multinomial_13_const_inputs(self): + probs = opset13.constant( + [[0.4, 0.5, 0.1], [0.3, 0.2, 0.5]], name="probs", dtype=np.float32) + num_samples = opset13.constant( + [3], name="num_samples", dtype=np.int64) + + op = opset13.multinomial(probs, num_samples, + convert_type="i64", + with_replacement=False, + log_probs=False) + + model = Model(op, []) + graph, loaded_model = TestOps.check_graph_can_save( + model, 'multinomial_const_model') + graph_node = graph.get_op_nodes(op="Multinomial")[0] + + self.assertEqual(graph_node["version"], "opset13") + self.assertListEqual(graph_node.out_port( + 0).data.get_shape().tolist(), [2, 3]) + self.assertEqual(graph_node["convert_type"], "i64") + self.assertFalse(graph_node["with_replacement"]) + self.assertFalse(graph_node["log_probs"]) + self.assertEqual(graph_node["global_seed"], 0) + self.assertEqual(graph_node["op_seed"], 0) + self.assertEqual(loaded_model.get_output_element_type(0), Type.i64) + self.assertEqual(loaded_model.get_output_partial_shape( + 0), PartialShape([2, 3]))