diff --git a/.github/scripts/collect_github_metrics.py b/.github/scripts/collect_github_metrics.py index d933fa0f927987..275adefe911cda 100644 --- a/.github/scripts/collect_github_metrics.py +++ b/.github/scripts/collect_github_metrics.py @@ -6,6 +6,7 @@ import logging import psycopg2 import dateutil +import argparse def init_logger(): LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper() @@ -13,16 +14,26 @@ def init_logger(): format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%m-%d-%Y %H:%M:%S') +def make_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('-r', '--repository-name', type=str, required=True, + help='Repository name in OWNER/REPOSITORY format') + parser.add_argument('--run-id', type=str, required=True, + help='Workflow Run ID') + + return parser + def create_db_tables(conn, cur): - cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_runs_test( - id SERIAL, - run_id BIGINT PRIMARY KEY, + cur.execute('''CREATE TABLE IF NOT EXISTS workflow_runs( + id SERIAL PRIMARY KEY, + run_id BIGINT, html_url TEXT, name VARCHAR(255), run_started_at TIMESTAMP, + created_at TIMESTAMP, + updated_at TIMESTAMP, triggering_actor_login VARCHAR(255), conclusion VARCHAR(25), - run_number INT, event VARCHAR(50), run_attempt INT, repository_full_name VARCHAR(255), @@ -30,13 +41,14 @@ def create_db_tables(conn, cur): head_branch VARCHAR(255), status VARCHAR(25), display_title TEXT, - path TEXT + path TEXT, + total_duration_seconds INT ); ''') - cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_jobs_test( - id SERIAL, - job_id BIGINT PRIMARY KEY, - parent_run_id BIGINT REFERENCES github_workflow_runs_test(run_id), + cur.execute('''CREATE TABLE IF NOT EXISTS workflow_jobs( + id SERIAL PRIMARY KEY, + job_id BIGINT, + parent_run_id BIGINT, html_url TEXT, name VARCHAR(255), created_at TIMESTAMP, @@ -47,12 +59,14 @@ def create_db_tables(conn, cur): runner_name VARCHAR(255), status VARCHAR(25), conclusion VARCHAR(25), - head_branch VARCHAR(255) + head_branch VARCHAR(255), + run_attempt INT, + workflow_name TEXT ); ''') - cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_steps_test( + cur.execute('''CREATE TABLE IF NOT EXISTS workflow_steps( id SERIAL PRIMARY KEY, - parent_job_id BIGINT REFERENCES github_workflow_jobs_test(job_id), + parent_job_id BIGINT, name VARCHAR(255), conclusion VARCHAR(25), number INT, @@ -65,20 +79,16 @@ def create_db_tables(conn, cur): def main(): init_logger() - + parser = make_parser() + args = parser.parse_args() logger = logging.getLogger(__name__) github_token = os.environ.get('GITHUB_TOKEN') if not github_token: raise ValueError('GITHUB_TOKEN environment variable is not set!') - run_id = os.environ.get('RUN_ID') - if not run_id: - raise ValueError('RUN_ID environment variable is not set!') - - repo_name = os.environ.get('GITHUB_REPOSITORY') - if not repo_name: - raise ValueError('GITHUB_REPOSITORY environment variable is not set!') + run_id = args.run_id + repo_name = args.repository_name # this should be specified in runner's env @@ -102,18 +112,30 @@ def main(): repo = g.get_repo(repo_name) run = repo.get_workflow_run(int(run_id)) - - workflow_data_query = f'''INSERT INTO github_workflow_runs_test( + if run.status != 'completed': + logger.error('Run %s is not completed! Only completed runs should be in the database', run_id) + raise SystemExit(1) + + # We rely on the following assumptions: + # - The workflow run is completed. When run.status != 'completed' we should not add it to the database + # theoretically the second attempt can be triggerred right after the completion of the first one + # or while the runner which executes this script is deploying + # + # - Job's queued duration equals "job.started_at - job.created_at" if started_at > created_at. + # Otherwise the job should not be added to the database + total_duration_seconds = round(run.timing().run_duration_ms / 1000) + workflow_data_query = f'''INSERT INTO workflow_runs( run_id, html_url, name, - run_started_at, triggering_actor_login, conclusion, - run_number, event, run_attempt, repository_full_name, - head_branch, display_title, path) + run_started_at, created_at, updated_at, triggering_actor_login, conclusion, + event, run_attempt, repository_full_name, + head_branch, display_title, path, total_duration_seconds) VALUES( '{run_id}', '{run.html_url}', '{run.name}', '{run.run_started_at}', + '{run.created_at}', '{run.updated_at}', '{run.raw_data['triggering_actor']['login']}', - '{run.conclusion}', '{run.run_number}', '{run.event}', + '{run.conclusion}', '{run.event}', '{run.run_attempt}', '{run.raw_data['repository']['full_name']}', - '{run.head_branch}', '{run.display_title}', '{run.path}' + '{run.head_branch}', '{run.display_title}', '{run.path}', '{total_duration_seconds}' ); ''' @@ -126,6 +148,10 @@ def main(): duration_seconds = 0 job_created_at_date = dateutil.parser.parse(job.raw_data['created_at']) + if job_created_at_date > job.started_at: + logger.warning('Skipping job %s of run %s - most likely a stub \ + job created after workflow restart', job.name, run_id) + continue queued_duration_timedelta = job.started_at - job_created_at_date queued_duration_seconds = round(queued_duration_timedelta.total_seconds()) @@ -134,17 +160,19 @@ def main(): duration_seconds = round(duration_timedelta.total_seconds()) job_data_query = f''' - INSERT INTO github_workflow_jobs_test( + INSERT INTO workflow_jobs( job_id, parent_run_id, html_url, name, created_at, started_at, completed_at, queued_duration_seconds, duration_seconds, - runner_name, status, conclusion, head_branch) + runner_name, status, conclusion, head_branch, + run_attempt, workflow_name + ) VALUES( '{job_id}', '{run_id}', '{job.html_url}', '{job.name}', '{job.raw_data['created_at']}', '{job.started_at}', '{job.completed_at}', '{queued_duration_seconds}', '{duration_seconds}', '{job.raw_data['runner_name']}', '{job.status}', '{job.conclusion}', - '{job.raw_data['head_branch']}' + '{job.raw_data['head_branch']}', '{job.raw_data['run_attempt']}', '{job.raw_data['workflow_name']}' ); ''' logger.debug('Job query: %s', job_data_query) @@ -154,7 +182,7 @@ def main(): duration_seconds = round(duration_seconds_timedelta.total_seconds()) step_data_query = f''' - INSERT INTO github_workflow_steps_test( + INSERT INTO workflow_steps( parent_job_id, name, conclusion, number, started_at, completed_at, duration_seconds) diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index 856f85afa29961..f5a0d86970616b 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -39,9 +39,5 @@ jobs: - name: CMake configure run: cmake -DCMAKE_BUILD_TYPE=Release -DTHREADING=SEQ -B build - - name: Get number of CPU cores - uses: SimenB/github-actions-cpu-cores@v2 - id: cpu-cores - - name: Build snippets - run: cmake --build build --target openvino_docs_snippets --parallel ${{ steps.cpu-cores.outputs.count }} + run: cmake --build build --target openvino_docs_snippets --parallel diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 23135222f0214c..73427f6871b75c 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -54,10 +54,6 @@ jobs: python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_tf2.txt python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_dev.txt - - name: Get number of CPU cores - uses: SimenB/github-actions-cpu-cores@v2 - id: cpu-cores - - name: Build OpenVINO with CMake uses: ashutoshvarma/action-cmake-build@master with: @@ -81,7 +77,6 @@ jobs: -DCMAKE_CXX_LINKER_LAUNCHER=ccache -DENABLE_SYSTEM_SNAPPY=ON build-type: Release - parallel: ${{ steps.cpu-cores.outputs.count }} - name: Install wheel packages run: cmake -DCOMPONENT=python_wheels -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install_pkg -P '${{ github.workspace }}/build/cmake_install.cmake' @@ -129,7 +124,6 @@ jobs: -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_LINKER_LAUNCHER=ccache -DCMAKE_CXX_LINKER_LAUNCHER=ccache - parallel: ${{ steps.cpu-cores.outputs.count }} - name: Print info diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index c89370851ad24c..51f9852fba0858 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -114,7 +114,12 @@ jobs: - name: TensorFlow Models Tests - TF FE run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/tensorflow/ -m ${{ inputs.model_scope }} --html=${INSTALL_TEST_DIR}/TEST-tf_fe_models_${{ inputs.model_scope }}.html --self-contained-html -v + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/tensorflow/test_tf_convert_model.py -m ${{ inputs.model_scope }} \ + --html=${INSTALL_TEST_DIR}/TEST-tf_fe_models_${{ inputs.model_scope }}.html --self-contained-html -v + # decouple notebook tests due to GitHub issue in tensorflow_hub https://github.com/tensorflow/hub/issues/903 + # and use WA to switch to (legacy) Keras 2 + TF_USE_LEGACY_KERAS=1 python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/tensorflow/test_tf_hub_api_notebooks.py -m ${{ inputs.model_scope }} \ + --html=${INSTALL_TEST_DIR}/TEST-tf_fe_models_notebooks_${{ inputs.model_scope }}.html --self-contained-html -v env: TEST_DEVICE: CPU diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml index 59146bda517182..fe8a3778c34f73 100644 --- a/.github/workflows/send_workflows_to_opentelemetry.yml +++ b/.github/workflows/send_workflows_to_opentelemetry.yml @@ -49,6 +49,8 @@ jobs: - name: Install deps run: | pip3 install -r .github/scripts/requirements.txt + # dependency review action has these as an exception + # yet it still complains, so install them here pip3 install PyGithub==2.2.0 psycopg2-binary==2.9.9 - name: Send metrics to SQL database @@ -58,6 +60,9 @@ jobs: PGHOST: ${{ secrets.METRICS_DATABASE_HOST }} PGUSER: ${{ secrets.METRICS_DATABASE_USERNAME }} PGPASSWORD: ${{ secrets.METRICS_DATABASE_PASSWORD }} + PGDATABASE: ${{ secrets.METRICS_DATABASE_NAME }} PGPORT: 5432 run: | - python3 .github/scripts/collect_github_metrics.py + python3 .github/scripts/collect_github_metrics.py \ + --run-id ${{ github.event.workflow_run.id }} \ + --repository-name ${GITHUB_REPOSITORY} diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst index 1cea2c755f1505..f5b6f025673e71 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/available-opsets/opset14.rst @@ -49,6 +49,7 @@ Table of Contents * :doc:`Constant <../operation-specs/infrastructure/constant-1>` * :doc:`Convert <../operation-specs/type/convert-1>` * :doc:`ConvertLike <../operation-specs/type/convert-like-1>` +* :doc:`ConvertPromoteTypes <../operation-specs/type/convert-promote-types-14>` * :doc:`Convolution <../operation-specs/convolution/convolution-1>` * :doc:`ConvolutionBackpropData <../operation-specs/convolution/convolution-backprop-data-1>` * :doc:`Cos <../operation-specs/arithmetic/cos-1>` @@ -205,4 +206,3 @@ Table of Contents * :doc:`Unique <../operation-specs/movement/unique-10>` * :doc:`Unsqueeze <../operation-specs/shape/unsqueeze-1>` * :doc:`VariadicSplit <../operation-specs/movement/variadic-split-1>` - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst index 07242d20b85327..250ef955bb41a8 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst @@ -42,6 +42,7 @@ Operation Specifications Concat-1 Constant-1 ConvertLike-1 + ConvertPromoteTypes-14 Convert-1 ConvolutionBackpropData-1 Convolution-1 @@ -229,4 +230,3 @@ Operation Specifications Unique-10 Unsqueeze-1 VariadicSplit-1 - diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/strided-slice-1.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/strided-slice-1.rst index a4025de9a9f924..242e5a58160c75 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/strided-slice-1.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/movement/strided-slice-1.rst @@ -14,6 +14,73 @@ StridedSlice **Short description**: *StridedSlice* extracts a strided slice of a tensor. +**Detailed description**: The *StridedSlice* operation extracts a slice from a given tensor based on computed indices from the inputs: begin (inclusive of the element at the given index), end (exclusive of the element at the given index), and stride, for each dimension. + +The operation takes inputs with the following properties: + +* :math:`input` tensor to slice, with N dimensions. +* :math:`begin, end, stride` - 1D lists of integers of the same length M. **Stride input cannot contain any zeros.** +* :math:`begin\_mask, end\_mask, new\_axis\_mask, shrink\_axis\_mask, ellipsis\_mask` - bitmasks, 1D lists of integers (0 or 1). :math:`ellipsis\_mask` can have up to one occurrence of the value 1. **Each mask can have a unique length. The length of the masks can differ from the rank of the input shape.** +* :math:`new\_axis\_mask, shrink\_axis\_mask, ellipsis\_mask` modify the output dimensionality of the data. If they are unused, :math:`N == M`. Otherwise, N does not necessarily equal M. + +.. note:: Negative Values in Begin and End (Negative Values Adjusting) + + Negative values present in :math:`begin` or :math:`end` represent indices starting from the back, i.e., the value of -1 represents the last element of the input dimension. In practice, negative values are automatically incremented by the size of the dimension. For example, if :math:`data = [0, 1, 2, 3]`, :math:`size(data) = 4`, :math:`begin(i) = -1` for some i, this value will be modified to be :math:`begin(i) = -1 + 4 = 3`. Note that if :math:`begin(i) = -5` for some i, this value will be adjusted as follows: :math:`begin(i) -5 + 4 = -1`, which will trigger value clamping. + +The basic slicing operation accumulates output elements as follows: + +* The operation iterates over the values of begin, end, and stride. At every step, the operation uses the i-th element of begin, end, and stride to perform the slicing at the corresponding dimension. +* Let :math:`slicing\_index = begin[i]`. This value determines the first index to start slicing. This sliced element is added to the output. +* If :math:`begin[i] == end[i]`, only a single element from the corresponding dimension is added to the output. The corresponding output dimension is then equal to 1 (in other words, the dimension is kept). +* At each step, the :math:`slicing\_index` is incremented by the value of :math:`stride[i]`. As long as the :math:`slicing\_index < end[i]`, the element corresponding to the :math:`slicing\_index` is added to the output. +* Whenever :math:`slicing\_index >= end[i]`, the slicing stops, and the corresponding element is not added to the output. + +Notice that the basic slicing operation assumes :math:`N == M` (that is, i-th slicing step corresponds to i-th dimension), as no masks are used. + +For the purposes of this specification, assume that :math:`dim` is the dimension corresponding to the i-th slicing step. + +.. note:: Indexing in Reverse (Slicing in Reverse) + + If :math:`stride[i] < 0`, the indexing will happen in reverse. At each step, the value of :math:`stride[i]` will be subtracted from the :math:`slicing\_index`. As long as the :math:`slicing\_index > end[i]`, the corresponding element is added to the output. Whenever :math:`slicing\_index <= end[i]`, the slicing stops. + +.. note:: Value Out-of-Bounds (Silent Clamping) + + If a value in begin or end is out of bounds for the corresponding dimension, it is silently clamped. In other words: + + * If :math:`begin[i] >= size(dim)`, then :math:`begin[i] = size(dim)`. If :math:`begin[i] < 0` (after Negative Values Adjusting), then :math:`begin[i] = 0`. + * If :math:`end[i] >= size(dim)`, then :math:`end[i] = size(dim)`. If :math:`end[i] < 0` (after Negative Values Adjusting), then :math:`end[i] = 0`. + + If slicing in reverse, the clamping behavior changes to the following: + + * If :math:`begin[i] >= size(dim)`, then :math:`begin[i] = size(dim) - 1`. If :math:`begin[i] < 0` (after Negative Values Adjusting), then :math:`begin[i] = 0`. + * If :math:`end[i] >= size(dim)`, then :math:`end[i] = size(dim)`. If :math:`end[i] < 0` (after Negative Values Adjusting), then :math:`end[i] = -1`. + +The operation accepts multiple bitmasks in the form of integer arrays to modify the above behavior. **If the length of the bitmask is less than the length of the corresponding input, it is assumed that the bitmask is extended (padded at the end) with zeros. If the length of the bitmask is greater than necessary, the remaining values are ignored.** + +For examples of usage of each mask, please refer to the examples provided at the end of the document. + +During the i-th slicing step: + +* If the :math:`begin\_mask[i]` is set to one, the value of :math:`begin[i]` is set to :math:`0`` (:math:`size(dim) - 1` if slicing in reverse). Equivalent of swapping left handside of Python slicing operation :math:`array[0:10]` with :math:`array[:10]` (slice from the start). +* If the :math:`end\_mask[i]` is set to one, the value of :math:`end[i]` is set to :math:`size(dim)` (:math:`0` if slicing in reverse - note that this does not allow slicing inclusively with the first value). Equivalent of swapping right handside of Python slicing operation :math:`array[0:10]` (assume :math:`len(array) = 10`) with :math:`array[0:]` (slice till the end, inclusive). +* If the :math:`new\_axis\_mask[i]` is set to one, the values of :math:`begin[i]`, :math:`end[i]`, and :math:`stride[i]` **ARE IGNORED**, and a new dimension with size 1 appears in the output. No slicing occurs at this step. Equivalent of inserting a new dimension into a matrix using numpy :math:`array[..., np.newaxis, ...]`: :math:`shape(array) = [..., 1, ...]`. +* If the :math:`shrink\_axis\_mask[i]` is set to one, the value of :math:`begin[i]` **MUST EQUAL** :math:`end[i]` (Note that this would normally result in a size 1 dimension), and the :math:`stride[i]` value **IS IGNORED**. The corresponding dimension is removed, with only a single element from that dimension remaining. Equivalent of selecting only a given element without preserving dimension (numpy equivalent of keepdims=False) :math:`array[..., 0, ...] -> array[..., ...]` (one less dimension). +* If the :math:`ellipsis\_mask[i]` is set to one, the values of :math:`begin[i], end[i],` and :math:`stride[i]` **ARE IGNORED**, and a number of dimensions is skipped. The exact number of dimensions skipped in the original input is :math:`rank(input) - (M - sum(new\_axis\_mask) - 1)`. The corresponding dimension is treated as an ellipsis ('...'), or in other words, it is treated as multiple, sequential, and unaffected by slicing dimensions, that match the rest of the slicing operation. This allows for a concise and flexible way to perform slicing operations, effectively condensing the slicing parameters for dimensions marked with ellipsis into a single slice notation. For example, given a 10D input, and tasked to select the first element from the 1st and last dimension, normally one would have to write :math:`[0, :, :, :, :, :, :, :, :, :, 0]`, but with ellipsis, it is only necessary to write :math:`[0, ..., 0]`. Equivalent of Equivalent of using the '...' (ellipsis) opeartion in numpy :math:`array[0, ..., 0] (rank(array) = 10)` is the same as writing :math:`array[0, :, :, :, :, :, :, :, :, 0]`. + +.. note:: The i-th Slicing Step and Dimension Modification + + The i-th slicing step does not necessarily correspond to the i-th dimension modification. Let i be the index of the slicing step and j be index of the corresponding processed dimension. + For trivial cases: + + * Every time all of the masks are not set (set to 0), j is incremented by one. + * Every time :math:`begin\_mask[i]` or :math:`end\_mask[i]` is set to one, j is incremented by one. + * Every time :math:`shrink\_axis\_mask[i]` is set to one, j is incremented by one. + + However: + + * Every time :math:`new\_axis\_mask[i]` is set to one, j is not incremented. + * When the value of one occurs at :math:`ellipsis\_mask[i]`, j is incremented by :math:`rank(input) - (M - sum(new\_axis\_mask) - 1)`. + **Attributes** * *begin_mask* @@ -58,15 +125,14 @@ StridedSlice **Inputs**: -* **1**: ``data`` - input tensor to be sliced of type *T* and arbitrary shape. **Required.** - -* **2**: ``begin`` - 1D tensor of type *T_IND* with begin indexes for input tensor slicing. **Required.** - Out-of-bounds values are silently clamped. If ``begin_mask[i]`` is ``1`` , the value of ``begin[i]`` is ignored and the range of the appropriate dimension starts from ``0``. Negative values mean indexing starts from the end. For example, if ``data=[1,2,3]``, ``begin[0]=-1`` means ``begin[0]=3``. +* **1**: ``data`` - input tensor to be sliced of type *T* and arbitrary shape. **Required.** +* **2**: ``begin`` - 1D tensor of type *T_IND* with begin indexes for input tensor slicing. Out-of-bounds values are silently clamped. If ``begin_mask[i]`` is ``1`` , the value of ``begin[i]`` is ignored and the range of the appropriate dimension starts from ``0``. Negative values mean indexing starts from the end. **Required.** +* **3**: ``end`` - 1D tensor of type *T_IND* with end indexes for input tensor slicing. Out-of-bounds values will be silently clamped. If ``end_mask[i]`` is ``1``, the value of ``end[i]`` is ignored and the full range of the appropriate dimension is used instead. Negative values mean indexing starts from the end. **Required.** +* **4**: ``stride`` - 1D tensor of type *T_IND* with strides. If not provided, stride is assumed to be equal to 1. **Optional.** -* **3**: ``end`` - 1D tensor of type *T_IND* with end indexes for input tensor slicing. **Required.** - Out-of-bounds values will be silently clamped. If ``end_mask[i]`` is ``1``, the value of ``end[i]`` is ignored and the full range of the appropriate dimension is used instead. Negative values mean indexing starts from the end. For example, if ``data=[1,2,3]``, ``end[0]=-1`` means ``end[0]=3``. +**Outputs**: -* **4**: ``stride`` - 1D tensor of type *T_IND* with strides. **Optional.** +* **1**: A tensor of type *T* with values selected by the slicing operation according to the rules specified above. **Types** @@ -74,46 +140,82 @@ StridedSlice * *T_IND*: any supported integer type. **Example** -Example of ``begin_mask`` & ``end_mask`` usage. + +Basic example with different strides, standard slicing and in reverse. Equivalent of performing :math:`array[0:4, 1:4, 0:4:2, 1:4:2, 3:0:-1, 3:0:-2]` on a 6D array. .. code-block:: xml :force: - + - 2 - 3 + 4 + 4 + 4 + 4 + 4 4 - 2 + 6 - 2 + 6 - 2 + 6 - 1 - 3 - 2 + 4 + 3 + 2 + 2 + 4 + 2 +Example of clamping in standard and reverse slicing. Equivalent of performing :math:`array[2:3, 2:1:-1]` on a 2D array. -Example of ``new_axis_mask`` usage. +.. code-block:: xml + :force: + + + + + + 2 + 2 + + + 2 + + + 2 + + + 2 + + + + + 1 + 1 + + + + +Example of negative slicing. Equivalent of performing array[0:2, 0:2, 0:-1] on a 3D array. .. code-block:: xml :force: - + 2 @@ -121,58 +223,218 @@ Example of ``new_axis_mask`` usage. 4 - 2 + 3 - 2 + 3 - 2 + 3 - 1 + 2 + 2 + 3 + + + + +Example of ``begin_mask`` & ``end_mask`` usage. Equivalent of performing :math:`array[1:, :, ::-1]` on a 3D array. + +.. code-block:: xml + :force: + + + + + 2 3 4 + + 3 + + + 3 + + + 3 + + + + + 1 + 3 + 3 + -Example of ``shrink_axis_mask`` usage. +Example of ``new_axis_mask`` usage. Equivalent of performing :math:`array[np.newaxis, 0:2, np.newaxis, 0:4]` on a 2D array. .. code-block:: xml :force: - + - 1 2 + 4 + + + 4 + + + 4 + + + 4 + + + + + 1 + 2 + 1 + 4 + + + + +Example of ``shrink_axis_mask`` usage. Equivalent of performing :math:`array[0:1, 0, 0:384, 0:640, 0:8]` on a 5D array. + +.. code-block:: xml + :force: + + + + + + 1 + 2 384 640 8 - 5 + 5 - 5 + 5 - 5 + 5 - 1 - 384 + 1 + 384 640 8 +Example of ``ellipsis_mask`` usage. Equivalent of performing :math:`array[0:4, ..., 0:5]` on a 10D array. + +.. code-block:: xml + :force: + + + + + + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + + + 3 + + + 3 + + + 3 + + + + + 4 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 5 + + + + +Example of ``ellipsis_mask`` usage with other masks of unequal length. Equivalent of performing :math:`array[2:, ..., np.newaxis, :10]` on a 10D array. + +.. code-block:: xml + :force: + + + + + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + + + 3 + + + 3 + + + 3 + + + + + 8 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 10 + 1 + 5 + + + diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/convert-promote-types-14.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/convert-promote-types-14.rst new file mode 100644 index 00000000000000..e9b8743193cb19 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/type/convert-promote-types-14.rst @@ -0,0 +1,186 @@ +.. {#openvino_docs_ops_type_ConvertPromoteTypes_14} + +ConvertPromoteTypes +=================== + + +.. meta:: + :description: Learn about ConvertPromoteTypes-14 - type conversion that promotes pair of input tensors to common datatype. + +**Versioned name**: *ConvertPromoteTypes-14* + +**Category**: *Type conversion* + +**Short description**: *ConvertPromoteTypes* operation performs promotion and conversion of ``input_0`` and ``input_1`` to common datatype based on promotion rules. + +**Detailed description** +Operation performs datatype promotion for a pair of inputs, returning pair of outputs that represent input tensors converted to common type. + +Promotion rules were designed to follow behavior of PyTorch and TensorFlow with experimental NumPy behavior enabled. + +If inputs have different type of data (for example, ``floating-point`` and ``integer``), resulting datatype is taken from input with higher type priority, +where ``floating-point`` types have higher priority than ``integer``, and ``integer`` have higher priority than ``boolean``. + + .. note:: + If *promote_unsafe* is set to ``false``, to mitigate possible issue with loss of precision or undefined behaviors caused by difference in maximum/minimum values supported by given data types, + in conversions from ``integer`` to ``floating-point``, conversion will fail if ``floating-point`` bit-width would be less than double of ``integer``. + +If both inputs have same type of data (for example, both are ``integer`` with any bit-width and sign), resulting datatype is chosen to be of same type of data with bit-width +and sign to hold all possible values supported by input data types, except when used with *pytorch_scalar_promotion*. + +* In case where *pytorch_scalar_promotion* is set to ``true``, one of inputs is scalar-tensor (rank 0) and second input is dimensioned tensor (any rank other than 0), datatype of the dimensioned tensor would be selected as a result common type, which may result in undefined behaviors if type of scalar input would support greater range of values than tensor input. + +* In case of ``floating-point`` types, resulting type is type with lowest bit-width of mantissa and exponential to fit mantissa and exponential of input types. This may result in unexpected bit widening in conversions like ``(bf16, f16) -> f32``. Conversion of ``(f8e4m3, f8e5m2) -> f16`` is a special case where conversion result was manually set to ``f16``, however it could be promoted to either bf16 and f16 based on mantissa and exponential based on rules. + +* In case of ``integer`` types, resulting type is an ``integer``, signed when any of inputs is signed and with minimal bit-width to hold all possible values supported by input data types. In case of promotion of signed and unsigned ``integers``, resulting datatype would be a signed ``integer`` with bit-width of at least double than unsigned input to be able to store possible maximum and minimum values of unsigned one. Exception is for u64 and any signed ``integers`` promotion - since it would result in unsupported by OpenVINO type ``i128``, outcome of this promotion can be set by *u64_integer_promotion_target* attribute, by default set to ``f32``. + + .. note:: + If *promote_unsafe* is set to ``false``, promotions that will introduce bit widening, + promotions of u64 with any signed ``integer``, or promotion causing conversion to type with lower range of values, exceptions will be raised. + +.. note:: + Promotion rules does not depend on order of inputs or values contained within tensors. Shape of tensors may affect type only when *pytorch_scalar_promotion* is set to ``true`` and both inputs have same type priority. + +Examples (notation: ``ConvertPromoteTypes(lhs_type, rhs_type) -> promoted_common_type``): + +* Regular promotions with attributes set to default values: + + * ``ConvertPromoteTypes(i8, f32) -> f32`` - floating has higher priority than integer, bit-width of 32 is more than double of 8, minimizing impact of precision loss. + * ``ConvertPromoteTypes(i32, u8) -> i32`` - both types of the same priority, signed integer has enough bit-width to represent all data of unsigned one. + +* Promotions that will cause exceptions when *promote_unsafe* will be set to ``false``: + + * ``ConvertPromoteTypes(f16, i64) -> f16`` - Floating-point type has higher priority, however, i64 can support values outside of range of f16, possibly resulting in undefined behaviors in conversion. + * ``ConvertPromoteTypes(f64, u64) -> f64`` - While f64 supports much bigger max values than u64, precision loss might be significant. + * ``ConvertPromoteTypes(i8, u8) -> i16`` - Both inputs have integer data type, however, to support ranges from both inputs, bit-widening was necessary. + * ``ConvertPromoteTypes(f16, bf16) -> f32`` - Both inputs have same data type, however, due to difference in mantissa and exponential, bit-widening to f32 is necessary to represent whole range and precision. This is in accordance of IEE 754. + * ``ConvertPromoteTypes(f8m4e3, f8m5e3) -> f16`` - Both inputs have f8 data type, however, due to difference in mantissa and exponential, bit widening to either f16 or bf16 is necessary, where f16 was selected as result of this promotion. + * ``ConvertPromoteTypes(u64, i8) -> f32`` - promotion of u64 and any signed integer would result in i128, which is not supported. Common type is set according to *u64_integer_promotion_target*, default f32. + +* Promotions for PyTorch-like mode with *pytorch_scalar_promotion* set to ``true``. Notation is extended by ``S(type)`` marking 0-dimensioned (scalar) tensor, and ``D(type)`` marking dimensioned tensor. + + * ``ConvertPromoteTypes(S(i64), D(u8)) -> u8`` - Inputs have same data type, promote to type of dimensioned input. Rules of safe promotion (controlled by *promote_unsafe*) apply in Pytorch-like conversion. + * ``ConvertPromoteTypes(S(f16), D(i8)) -> f16`` - Inputs have mixed data types - follow general conversion rules, dimensions of inputs does not affect common type. + +**Attributes**: + +* *promote_unsafe* + + * **Description**: allow for promotions that might result in bit-widening, significant precision loss and undefined behaviors. When false, exceptions will be raised. + * **Range of values**: true or false + * **Type**: ``bool`` + * **Default value**: false + * **Required**: *no* + +* *pytorch_scalar_promotion* + + * **Description**: if true, when scalar and dimensioned tensor with the same type priority (both either floating-point or integers) are provided as inputs, align datatype to dimensioned one. + * **Range of values**: true or false + * **Type**: ``bool`` + * **Default value**: false + * **Required**: *no* + +* *u64_integer_promotion_target* + + * **Description**: promotion target for promotion of u64 and any signed integer inputs. + * **Range of values**: any element type supported by Convert operator. + * **Type**: ``element::Type`` + * **Default value**: element::f32 + * **Required**: *no* + +**Inputs** + +* **1**: ``input_0`` - A tensor of type *T1* and arbitrary shape. **Required.** +* **2**: ``input_1`` - A tensor of type *T2* and arbitrary shape. **Required.** + +**Outputs** + +* **1**: The result of *ConvertPromoteTypes* operation applied to input tensor ``input_0``. A tensor of type *T_OUT* and the same shape as ``input_0`` input tensor. +* **2**: The result of *ConvertPromoteTypes* operation applied to input tensor ``input_1``. A tensor of type *T_OUT* and the same shape as ``input_1`` input tensor. + +**Types** + +* *T1*: any supported type. +* *T2*: any supported type. +* *T_OUT*: Result of type promotion for given input. + +**Example 1: Promote floats** + +.. code-block:: xml + :force: + + + + + + 256 + 56 + + + 3 + + + + + 256 + 56 + + + 3 + + + + +**Example 2: Promote integers unsafe** + +.. code-block:: xml + :force: + + + + + + 256 + 56 + + + 3 + + + + + 256 + 56 + + + 3 + + + + +**Example 3: Promote u64 and signed integer unsafe** + +.. code-block:: xml + :force: + + + + + + 256 + 56 + + + 3 + + + + < !-- type provided by u64_integer_promotion_target --> + 256 + 56 + + < !-- type provided by u64_integer_promotion_target --> + 3 + + + diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference.rst index 55555ac83a37de..c2effe2c140f13 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference.rst @@ -23,44 +23,43 @@ Optimize Inference optimizations that can be done independently. Inference speed depends on latency and throughput. - -Runtime optimization, or deployment optimization, focuses on tuning inference parameters and execution means (e.g., the optimum number of requests executed simultaneously). Unlike model-level optimizations, they are highly specific to the hardware and case they are used for, and often come at a cost. -``ov::hint::inference_precision`` is a "typical runtime configuration" which trades accuracy for performance, allowing ``fp16/bf16`` execution for the layers that remain in ``fp32`` after quantization of the original ``fp32`` model. - -Therefore, optimization should start with defining the use case. For example, if it is about processing millions of samples by overnight jobs in data centers, throughput could be prioritized over latency. On the other hand, real-time usages would likely trade off throughput to deliver the results at minimal latency. A combined scenario is also possible, targeting the highest possible throughput, while maintaining a specific latency threshold. - -It is also important to understand how the full-stack application would use the inference component "end-to-end." For example, to know what stages need to be orchestrated to save workload devoted to fetching and preparing input data. - -For more information on this topic, see the following articles: - -* :doc:`Supported Devices <../../about-openvino/compatibility-and-support/supported-devices>` -* :doc:`Inference Devices and Modes ` -* :ref:`Inputs Pre-processing with the OpenVINO ` -* :ref:`Async API ` -* :ref:`The 'get_tensor' Idiom ` -* For variably-sized inputs, consider :doc:`dynamic shapes ` - - -See the :doc:`latency ` and :doc:`throughput ` optimization guides, for **use-case-specific optimizations** - -Writing Performance-Portable Inference Applications -################################################### - -Although inference performed in OpenVINO Runtime can be configured with a multitude of low-level performance settings, it is not recommended in most cases. Firstly, achieving the best performance with such adjustments requires deep understanding of device architecture and the inference engine. - - -Secondly, such optimization may not translate well to other device-model combinations. In other words, one set of execution parameters is likely to result in different performance when used under different conditions. For example: - -* both the CPU and GPU support the notion of :doc:`streams <./optimize-inference/optimizing-throughput/advanced_throughput_options>`, yet they deduce their optimal number very differently. -* Even among devices of the same type, different execution configurations can be considered optimal, as in the case of instruction sets or the number of cores for the CPU and the batch size for the GPU. -* Different models have different optimal parameter configurations, considering factors such as compute vs memory-bandwidth, inference precision, and possible model quantization. -* Execution "scheduling" impacts performance strongly and is highly device-specific, for example, GPU-oriented optimizations like batching, combining multiple inputs to achieve the optimal throughput, :doc:`do not always map well to the CPU `. - - -To make the configuration process much easier and its performance optimization more portable, the option of :doc:`Performance Hints ` has been introduced. It comprises two high-level "presets" focused on either **latency** or **throughput** and, essentially, makes execution specifics irrelevant. - -The Performance Hints functionality makes configuration transparent to the application, for example, anticipates the need for explicit (application-side) batching or streams, and facilitates parallel processing of separate infer requests for different input sources - +Runtime, or deployment optimization focuses on tuning inference and execution parameters. Unlike +model-level optimization, it is highly specific to the hardware you use and the goal you want +to achieve. You need to plan whether to prioritize accuracy or performance, +:doc:`throughput ` or :doc:`latency `, +or aim at the golden mean. You should also predict how scalable your application needs to be +and how exactly it is going to work with the inference component. This way, you will be able +to achieve the best results for your product. + +.. note:: + + For more information on this topic, see the following articles: + + * :doc:`Inference Devices and Modes ` + * :ref:`Inputs Pre-processing with the OpenVINO ` + * :ref:`Async API ` + * :ref:`The 'get_tensor' Idiom ` + * For variably-sized inputs, consider :doc:`dynamic shapes ` + +Performance-Portable Inference +################################ + +To make configuration easier and performance optimization more portable, OpenVINO offers the +:doc:`Performance Hints ` feature. It comprises +two high-level “presets” focused on latency **(default)** or throughput. + +Although inference with OpenVINO Runtime can be configured with a multitude +of low-level performance settings, it is not recommended, as: + +* It requires deep understanding of device architecture and the inference engine. +* It may not translate well to other device-model combinations. For example: + + * CPU and GPU deduce their optimal number of streams differently. + * Different devices of the same type, favor different execution configurations. + * Different models favor different parameter configurations (e.g., compute vs memory-bandwidth, + inference precision, and possible model quantization). + * Execution “scheduling” impacts performance strongly and is highly device specific. GPU-oriented + optimizations :doc:`do not always map well to the CPU `. Additional Resources #################### diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst index 5a678d6b437203..958e585e5de76f 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/high-level-performance-hints.rst @@ -21,9 +21,9 @@ The hints, in contrast, respect the actual model, so the parameters for optimal Performance Hints: Latency and Throughput ######################################### -As discussed in the :doc:`Optimization Guide <../optimize-inference>` there are a few different metrics associated with inference speed. Throughput and latency are some of the most widely used metrics that measure the overall performance of an application. +As discussed in the :doc:`Optimization Guide <../optimize-inference>` there are a few different metrics associated with inference speed. Latency and throughput are some of the most widely used metrics that measure the overall performance of an application. -Therefore, in order to ease the configuration of the device, OpenVINO offers two dedicated hints, namely ``ov::hint::PerformanceMode::THROUGHPUT`` and ``ov::hint::PerformanceMode::LATENCY``. +Therefore, in order to ease the configuration of the device, OpenVINO offers two dedicated hints, namely ``ov::hint::PerformanceMode::LATENCY`` **(default)** and ``ov::hint::PerformanceMode::THROUGHPUT``. For more information on conducting performance measurements with the ``benchmark_app``, refer to the last section in this document. diff --git a/src/bindings/js/node/include/compiled_model.hpp b/src/bindings/js/node/include/compiled_model.hpp index f9c1927a3a90fb..4e3834a3a527bd 100644 --- a/src/bindings/js/node/include/compiled_model.hpp +++ b/src/bindings/js/node/include/compiled_model.hpp @@ -72,5 +72,11 @@ class CompiledModelWrap : public Napi::ObjectWrap { Napi::Value export_model(const Napi::CallbackInfo& info); private: + /** @brief Gets node of a compiled model specified in CallbackInfo. */ + Napi::Value get_node(const Napi::CallbackInfo& info, + const ov::Output& (ov::CompiledModel::*func)() const, + const ov::Output& (ov::CompiledModel::*func_tname)(const std::string&)const, + const ov::Output& (ov::CompiledModel::*func_idx)(size_t) const); + ov::CompiledModel _compiled_model; }; diff --git a/src/bindings/js/node/src/compiled_model.cpp b/src/bindings/js/node/src/compiled_model.cpp index 304ac9d299a3f2..ad77bbd39202f6 100644 --- a/src/bindings/js/node/src/compiled_model.cpp +++ b/src/bindings/js/node/src/compiled_model.cpp @@ -45,25 +45,17 @@ Napi::Value CompiledModelWrap::create_infer_request(const Napi::CallbackInfo& in } Napi::Value CompiledModelWrap::get_output(const Napi::CallbackInfo& info) { - if (info.Length() == 0) { - try { - return Output::wrap(info.Env(), _compiled_model.output()); - } catch (std::exception& e) { - reportError(info.Env(), e.what()); - return Napi::Value(); - } - } else if (info.Length() != 1) { - reportError(info.Env(), "Invalid number of arguments -> " + std::to_string(info.Length())); - return Napi::Value(); - } else if (info[0].IsString()) { - auto tensor_name = info[0].ToString(); - return Output::wrap(info.Env(), _compiled_model.output(tensor_name)); - } else if (info[0].IsNumber()) { - auto idx = info[0].As().Int32Value(); - return Output::wrap(info.Env(), _compiled_model.output(idx)); - } else { - reportError(info.Env(), "Error while getting compiled model outputs."); - return Napi::Value(); + try { + return get_node( + info, + static_cast& (ov::CompiledModel::*)() const>(&ov::CompiledModel::output), + static_cast& (ov::CompiledModel::*)(const std::string&)const>( + &ov::CompiledModel::output), + static_cast& (ov::CompiledModel::*)(size_t) const>( + &ov::CompiledModel::output)); + } catch (std::exception& e) { + reportError(info.Env(), e.what() + std::string("outputs.")); + return info.Env().Null(); } } @@ -79,25 +71,17 @@ Napi::Value CompiledModelWrap::get_outputs(const Napi::CallbackInfo& info) { } Napi::Value CompiledModelWrap::get_input(const Napi::CallbackInfo& info) { - if (info.Length() == 0) { - try { - return Output::wrap(info.Env(), _compiled_model.input()); - } catch (std::exception& e) { - reportError(info.Env(), e.what()); - return Napi::Value(); - } - } else if (info.Length() != 1) { - reportError(info.Env(), "Invalid number of arguments -> " + std::to_string(info.Length())); - return Napi::Value(); - } else if (info[0].IsString()) { - auto tensor_name = info[0].ToString(); - return Output::wrap(info.Env(), _compiled_model.input(tensor_name)); - } else if (info[0].IsNumber()) { - auto idx = info[0].As().Int32Value(); - return Output::wrap(info.Env(), _compiled_model.input(idx)); - } else { - reportError(info.Env(), "Error while getting compiled model inputs."); - return Napi::Value(); + try { + return get_node( + info, + static_cast& (ov::CompiledModel::*)() const>(&ov::CompiledModel::input), + static_cast& (ov::CompiledModel::*)(const std::string&)const>( + &ov::CompiledModel::input), + static_cast& (ov::CompiledModel::*)(size_t) const>( + &ov::CompiledModel::input)); + } catch (std::exception& e) { + reportError(info.Env(), e.what() + std::string("inputs.")); + return info.Env().Null(); } } @@ -112,6 +96,26 @@ Napi::Value CompiledModelWrap::get_inputs(const Napi::CallbackInfo& info) { return js_inputs; } +Napi::Value CompiledModelWrap::get_node( + const Napi::CallbackInfo& info, + const ov::Output& (ov::CompiledModel::*func)() const, + const ov::Output& (ov::CompiledModel::*func_tname)(const std::string&)const, + const ov::Output& (ov::CompiledModel::*func_idx)(size_t) const) { + if (info.Length() == 0) { + return Output::wrap(info.Env(), (_compiled_model.*func)()); + } else if (info.Length() != 1) { + OPENVINO_THROW(std::string("Invalid number of arguments.")); + } else if (info[0].IsString()) { + auto tensor_name = info[0].ToString(); + return Output::wrap(info.Env(), (_compiled_model.*func_tname)(tensor_name)); + } else if (info[0].IsNumber()) { + auto idx = info[0].As().Int32Value(); + return Output::wrap(info.Env(), (_compiled_model.*func_idx)(idx)); + } else { + OPENVINO_THROW(std::string("Error while getting compiled model ")); + } +} + Napi::Value CompiledModelWrap::export_model(const Napi::CallbackInfo& info) { std::stringstream _stream; _compiled_model.export_model(_stream); diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 94054315f9c0c6..1b8c6177a26459 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -11,6 +11,7 @@ pytest-timeout==2.2.0 py>=1.9.0 pygments>=2.8.1 setuptools>=65.6.1 +mpmath<1.4 sympy>=1.10 wheel>=0.38.1 patchelf<=0.17.2.1 @@ -18,7 +19,7 @@ patchelf<=0.17.2.1 # Frontends docopt~=0.6.2 paddlepaddle==2.5.2 -tensorflow>=1.15.5,<2.16.0 +tensorflow>=1.15.5,<2.17.0 six~=1.16.0 protobuf>=3.18.1,<4.0.0 onnx==1.15.0 diff --git a/src/bindings/python/src/openvino/frontend/pytorch/__init__.py b/src/bindings/python/src/openvino/frontend/pytorch/__init__.py index 7b1cbb471ca5c3..0d730a1f31ffc3 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/__init__.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/__init__.py @@ -13,6 +13,7 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType from openvino.frontend.pytorch.py_pytorch_frontend import ConversionExtensionPytorch as ConversionExtension from openvino.frontend.pytorch.py_pytorch_frontend import OpExtensionPytorch as OpExtension + from openvino.frontend.pytorch.module_extension import ModuleExtension except ImportError as err: raise ImportError("OpenVINO PyTorch frontend is not available, please make sure the frontend is built." "{}".format(err)) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py index e29bbd17c8d11d..9d02c409b0a525 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py @@ -1,4 +1,3 @@ - # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/src/bindings/python/src/openvino/frontend/pytorch/module_extension.py b/src/bindings/python/src/openvino/frontend/pytorch/module_extension.py new file mode 100644 index 00000000000000..baa22d1dbde3db --- /dev/null +++ b/src/bindings/python/src/openvino/frontend/pytorch/module_extension.py @@ -0,0 +1,39 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# flake8: noqa +# mypy: ignore-errors + +class ModuleExtension: + def __init__(self, module, target_op, evaluate=None, convert=None): + """ + Creates an extension that replaces entire PyTorch module by a single operation. + This functionality works with PyTorch models only. A module can be identified by + module type (e.g. torch.nn.Linear), module instance in the model or module name. + + Args: + module (str, torch.nn.Module, type(torch.nn.Module)): PyTorch module to replace + + target_op (str): a target operation that will be used as a replacer for the module, + could be a name of the extension operation or existing PyTorch operation + (with prim:: or aten:: prefix following TorchScript syntax). + + evaluate (callable with args module, *args, **kwargs): a callable that will replace a target + module in model execution it is responsible for producing valid output for + the module to allow correct model tracing. By default it calls original module + forward with the same arguments. The provided code will not be a part of the final + traced model, it is used only to produce valid results in the tracing. + + convert (callable with args target_op, *args, **kwargs): a callable that will be traced and become + a part of the final model instead of the target module. It accepts target_op as + the first parameter, target_op is callable that will appear as a single node in the + graph, the type of the node is target_op provided as another argument above. + """ + self.module = module + self.target_op = target_op + self.evaluate = evaluate + if self.evaluate is None: + self.evaluate = lambda module, *args, **kwargs: module(*args, **kwargs) + self.convert = convert + if self.convert is None: + self.convert = lambda module, target_op, *args, **kwargs: target_op(*args, **kwargs) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py new file mode 100644 index 00000000000000..a55e031cf2c73f --- /dev/null +++ b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py @@ -0,0 +1,81 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# flake8: noqa +# mypy: ignore-errors + +import torch + + +class no_jit_trace: + def __enter__(self): + self.state = torch._C._get_tracing_state() + torch._C._set_tracing_state(None) + + def __exit__(self, *args): + torch._C._set_tracing_state(self.state) + self.state = None + + +def patch_model(model, module_extensions, orig_forward_name): + def module_patcher(m, name): + extension = None + if m in module_extensions: + extension = module_extensions[m] + elif m.__class__ in module_extensions: + extension = module_extensions[m.__class__] + elif name in module_extensions: + extension = module_extensions[name] + + if extension: + # The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module. + class Trampoline(torch.autograd.Function): + target_extension = extension + original_module = m + stashed_args = None + stashed_kwargs = None + + @staticmethod + @torch.jit.ignore + def forward(*args, **kwargs): + with no_jit_trace(): + # `module` is going to be passed to a user-defined function `evaluate` + # `module` is patched: forward function was replaced, and we are actually in this patched function right in this code + # if we pass `module` as-is to the user code below, and it happens to call forward it will lead to infinite recursion or fail + # so we need to temporary patch the module back to the original forward and then return it back again + # stash the current forward to be able to return it back + patched_forward = m.forward + # set original forward for the module + m.forward = getattr(m, orig_forward_name) + # call user code + results = extension.evaluate( + m, *Trampoline.stashed_args, **Trampoline.stashed_kwargs) # call user code + m.forward = patched_forward # return patched forward back + return results + + def new_forward(*args, **kwargs): + Trampoline.stashed_args = args + Trampoline.stashed_kwargs = kwargs + return extension.convert(m, Trampoline.apply, *args, **kwargs) + setattr(m, orig_forward_name, m.forward) + m.forward = new_forward + + for name, m in model.named_modules(): + if hasattr(m, orig_forward_name): + # already patched, skipping with a warning because it is unexpected + print(f'[ WARNING ] Unexpectedly found already patched module {name} while applying ModuleExtension during PyTorch model conversion. ' + 'Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model.') + continue + module_patcher(m, name) + + +def unpatch_model(model, orig_forward_name): + for _, m in model.named_modules(): + if hasattr(m, orig_forward_name): + try: + m.forward = getattr(m, orig_forward_name) + delattr(m, orig_forward_name) + except Exception as error: + print('[ WARNING ] Exception raised during model unpatching. Depending on the exact issue it may lead to broken original model.') + print('Original exception details:') + print(error) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py index 0a7bebd8763215..a1071c1af0e3b8 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py @@ -29,104 +29,217 @@ class OperatorSupport(OperatorSupport): def __init__(self, options): support_dict = { "_operator.getitem": None, + "torch.ops.aten._adaptive_avg_pool1d.default": None, "torch.ops.aten._adaptive_avg_pool2d.default": None, + "torch.ops.aten._adaptive_avg_pool3d.default": None, + "torch.ops.aten._convolution.default": None, + "torch.ops.aten._embedding_bag.default": None, + "torch.ops.aten._fake_quantize_per_tensor_affine_cachemask_tensor_qparams.default": None, + "torch.ops.aten._local_scalar_dense.default": None, "torch.ops.aten._log_softmax.default": None, + "torch.ops.aten._native_batch_norm_legit.default": None, + "torch.ops.aten._native_batch_norm_legit.no_stats": None, + "torch.ops.aten._native_batch_norm_legit_functional.default": None, + "torch.ops.aten._native_batch_norm_legit_no_training.default": None, + "torch.ops.aten._scaled_dot_product_flash_attention.default": None, + "torch.ops.aten._scaled_dot_product_flash_attention_for_cpu.default": None, "torch.ops.aten._softmax.default": None, "torch.ops.aten._to_copy.default": None, "torch.ops.aten._unsafe_view.default": None, - "torch.ops.aten._unsafe_view.default": None, + "torch.ops.aten.abs.default": None, + "torch.ops.aten.acos.default": None, + "torch.ops.aten.acosh.default": None, + "torch.ops.aten.adaptive_max_pool1d.default": None, + "torch.ops.aten.adaptive_max_pool2d.default": None, + "torch.ops.aten.adaptive_max_pool3d.default": None, "torch.ops.aten.add.Scalar": None, "torch.ops.aten.add.Tensor": None, "torch.ops.aten.add_.Tensor": None, + "torch.ops.aten.addcmul.default": None, "torch.ops.aten.addmm.default": None, + "torch.ops.aten.alias.default": None, + "torch.ops.aten.all.default": None, "torch.ops.aten.amax.default": None, - "torch.ops.aten.arange.start": None, + "torch.ops.aten.amin.default": None, + "torch.ops.aten.any.default": None, + "torch.ops.aten.any.dim": None, "torch.ops.aten.arange.default": None, + "torch.ops.aten.arange.start": None, + "torch.ops.aten.arange.start_step": None, "torch.ops.aten.argmax.default": None, + "torch.ops.aten.argmin.default": None, + "torch.ops.aten.as_strided.default": None, + "torch.ops.aten.asin.default": None, + "torch.ops.aten.asinh.default": None, + "torch.ops.aten.asinh.default": None, + "torch.ops.aten.atanh.default": None, "torch.ops.aten.avg_pool2d.default": None, + "torch.ops.aten.avg_pool3d.default": None, "torch.ops.aten.baddbmm.default": None, "torch.ops.aten.bitwise_and.Tensor": None, + "torch.ops.aten.bitwise_not.default": None, + "torch.ops.aten.bitwise_or.Tensor": None, + "torch.ops.aten.bitwise_xor.Tensor": None, "torch.ops.aten.bmm.default": None, "torch.ops.aten.cat.default": None, + "torch.ops.aten.ceil.default": None, + "torch.ops.aten.clamp.default": None, + "torch.ops.aten.clamp_max.default": None, + "torch.ops.aten.clamp_max.Tensor": None, "torch.ops.aten.clamp_min.default": None, + "torch.ops.aten.clamp_min.Tensor": None, "torch.ops.aten.clone.default": None, + "torch.ops.aten.constant_pad_nd.default": None, "torch.ops.aten.convolution.default": None, + "torch.ops.aten.copy.default": None, "torch.ops.aten.copy_.default": None, "torch.ops.aten.cos.default": None, + "torch.ops.aten.cosh.default": None, "torch.ops.aten.cumsum.default": None, "torch.ops.aten.detach.default": None, + "torch.ops.aten.detach_.default": None, "torch.ops.aten.div.Scalar": None, "torch.ops.aten.div.Tensor": None, + "torch.ops.aten.div.Tensor_mode": None, + "torch.ops.aten.div_.Tensor": None, + "torch.ops.aten.elu.default": None, + "torch.ops.aten.elu_.default": None, "torch.ops.aten.embedding.default": None, "torch.ops.aten.empty.memory_format": None, - "torch.ops.aten.erf.default": None, "torch.ops.aten.eq.Scalar": None, "torch.ops.aten.eq.Tensor": None, + "torch.ops.aten.erf.default": None, "torch.ops.aten.exp.default": None, "torch.ops.aten.expand.default": None, + "torch.ops.aten.fake_quantize_per_channel_affine_cachemask.default": None, "torch.ops.aten.fill.Scalar": None, + "torch.ops.aten.fill_.Scalar": None, + "torch.ops.aten.fill.Tensor": None, + "torch.ops.aten.fill_.Tensor": None, + "torch.ops.aten.flip.default": None, + "torch.ops.aten.floor.default": None, + "torch.ops.aten.floor.default": None, + "torch.ops.aten.fmod.Scalar": None, + "torch.ops.aten.fmod.Tensor": None, "torch.ops.aten.full.default": None, + "torch.ops.aten.full.names": None, + "torch.ops.aten.full_like.default": None, "torch.ops.aten.gather.default": None, + "torch.ops.aten.ge.Scalar": None, + "torch.ops.aten.ge.Tensor": None, "torch.ops.aten.gelu.default": None, + "torch.ops.aten.glu.default": None, + "torch.ops.aten.grid_sampler_2d.default": None, "torch.ops.aten.gt.Scalar": None, + "torch.ops.aten.gt.Tensor": None, "torch.ops.aten.hardsigmoid.default": None, + "torch.ops.aten.hardswish.default": None, "torch.ops.aten.hardswish_.default": None, + "torch.ops.aten.hardtanh.default": None, "torch.ops.aten.hardtanh_.default": None, "torch.ops.aten.index.Tensor": None, + "torch.ops.aten.index_select.default": None, + "torch.ops.aten.isfinite.default": None, + "torch.ops.aten.isinf.default": None, + "torch.ops.aten.isnan.default": None, + "torch.ops.aten.le.Scalar": None, + "torch.ops.aten.le.Tensor": None, + "torch.ops.aten.leaky_relu.default": None, "torch.ops.aten.leaky_relu_.default": None, "torch.ops.aten.lift_fresh_copy.default": None, "torch.ops.aten.linalg_vector_norm.default": None, - "torch.ops.aten.lt.Tensor": None, "torch.ops.aten.log.default": None, "torch.ops.aten.log_sigmoid_forward.default": None, + "torch.ops.aten.log10.default": None, + "torch.ops.aten.log1p.default": None, + "torch.ops.aten.log2.default": None, + "torch.ops.aten.logical_not.default": None, "torch.ops.aten.logsumexp.default": None, - "torch.ops.aten.masked_fill_.Scalar": None, + "torch.ops.aten.lt.Scalar": None, + "torch.ops.aten.lt.Tensor": None, + "torch.ops.aten.masked_fill.Scalar": None, "torch.ops.aten.masked_fill.Tensor": None, + "torch.ops.aten.masked_fill_.Scalar": None, + "torch.ops.aten.masked_fill_.Tensor": None, + "torch.ops.aten.max.default": None, "torch.ops.aten.max.dim": None, "torch.ops.aten.max_pool2d_with_indices.default": None, + "torch.ops.aten.max_pool3d_with_indices.default": None, + "torch.ops.aten.maximum.default": None, + "torch.ops.aten.mean.default": None, "torch.ops.aten.mean.dim": None, + "torch.ops.aten.min.default": None, + "torch.ops.aten.min.dim": None, + "torch.ops.aten.minimum.default": None, "torch.ops.aten.mm.default": None, "torch.ops.aten.mul.Scalar": None, "torch.ops.aten.mul.Tensor": None, "torch.ops.aten.native_batch_norm.default": None, - "torch.ops.aten._native_batch_norm_legit.default": None, - "torch.ops.aten._native_batch_norm_legit_no_training.default": None, + "torch.ops.aten.native_dropout.default": None, "torch.ops.aten.native_group_norm.default": None, "torch.ops.aten.native_layer_norm.default": None, - "torch.ops.aten.new_full.default": None, + "torch.ops.aten.ne.Scalar": None, + "torch.ops.aten.ne.Tensor": None, "torch.ops.aten.neg.default": None, + "torch.ops.aten.new_full.default": None, "torch.ops.aten.new_ones.default": None, + "torch.ops.aten.new_zeros.default": None, + "torch.ops.aten.ones.default": None, "torch.ops.aten.permute.default": None, + "torch.ops.aten.pow.Scalar": None, "torch.ops.aten.pow.Tensor_Scalar": None, + "torch.ops.aten.pow.Tensor_Tensor": None, + "torch.ops.aten.rand.default": None, + "torch.ops.aten.reciprocal.default": None, "torch.ops.aten.relu.default": None, "torch.ops.aten.relu_.default": None, + "torch.ops.aten.repeat.default": None, + "torch.ops.aten.roll.default": None, "torch.ops.aten.rsqrt.default": None, "torch.ops.aten.rsub.Scalar": None, - "torch.ops.aten._scaled_dot_product_flash_attention.default": None, + "torch.ops.aten.rsub.Tensor": None, "torch.ops.aten.scalar_tensor.default": None, + "torch.ops.aten.scatter.src": None, + "torch.ops.aten.scatter.value": None, "torch.ops.aten.select.int": None, + "torch.ops.aten.select_scatter.default": None, "torch.ops.aten.sigmoid.default": None, + "torch.ops.aten.sign.default": None, "torch.ops.aten.silu.default": None, "torch.ops.aten.silu_.default": None, "torch.ops.aten.sin.default": None, + "torch.ops.aten.sinh.default": None, "torch.ops.aten.slice.Tensor": None, + "torch.ops.aten.slice_scatter.default": None, + "torch.ops.aten.sort.default": None, "torch.ops.aten.split.Tensor": None, + "torch.ops.aten.split_with_sizes.default": None, + "torch.ops.aten.sqrt.default": None, "torch.ops.aten.squeeze.dim": None, "torch.ops.aten.squeeze.dims": None, "torch.ops.aten.stack.default": None, "torch.ops.aten.sub.default": None, "torch.ops.aten.sub.Tensor": None, + "torch.ops.aten.sum.default": None, "torch.ops.aten.sum.dim_IntList": None, "torch.ops.aten.t.default": None, + "torch.ops.aten.tan.default": None, "torch.ops.aten.tanh.default": None, + "torch.ops.aten.topk.default": None, "torch.ops.aten.transpose.int": None, + "torch.ops.aten.tril.default": None, + "torch.ops.aten.tril_.default": None, "torch.ops.aten.unbind.int": None, + "torch.ops.aten.unfold.default": None, "torch.ops.aten.unsqueeze.default": None, "torch.ops.aten.upsample_nearest2d.default": None, + "torch.ops.aten.var.correction": None, "torch.ops.aten.var_mean.correction": None, "torch.ops.aten.view.default": None, "torch.ops.aten.where.self": None, "torch.ops.aten.zeros_like.default": None, + "torch.ops.torchvision.deform_conv2d.default": None, + "torch.ops.torchvision.roi_align.default": None, } for op in _get_disabled_ops(options): diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index 15b48c1c579b91..48cf6136b7afc9 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -10,13 +10,24 @@ from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, prepare_example_inputs_and_model, convert_quantized_tensor, graph_has_ops from openvino.runtime import opset11 as ops from openvino.frontend.pytorch import gptq +from openvino.frontend.pytorch import patch_model +from openvino.frontend.pytorch.module_extension import ModuleExtension import typing import torch class TorchScriptPythonDecoder (Decoder): - def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=None, shared_memory=True, skip_freeze=False, constant_cache=None): + def __init__( + self, + pt_module, + graph_element=None, + example_input=None, + alias_db=None, + shared_memory=True, + skip_freeze=False, + constant_cache=None, + module_extensions=None): Decoder.__init__(self) # We store every decoder created by this decoder so that all them are not deleted until the first decoder is deleted self.m_decoders = [] @@ -24,6 +35,7 @@ def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=N self._shared_memory = shared_memory self._input_is_list = False self.constant_cache = constant_cache if constant_cache is not None else dict() + self.module_extensions = module_extensions if graph_element is None: try: pt_module = self._get_scripted_model( @@ -89,14 +101,22 @@ def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False) input_params = inspect.signature(pt_module.forward if hasattr( pt_module, "forward") else pt_module.__call__).parameters input_signature = list(input_params) + if example_inputs is None: + if self.module_extensions: + raise RuntimeError("ModuleExtension is not supported for scripting. Please provide valid example_input argument to run tracing.") scripted = torch.jit.script(pt_module) freeze_by_default = True else: input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model( example_inputs, input_params, pt_module) - gptq_patched = False + # name of attribute in a patched module where the original forward method is kept + orig_forward_name = '_openvino_module_extension_patch_orig_forward' + if self.module_extensions: + patch_model.patch_model(pt_module, self.module_extensions, orig_forward_name) + + gptq_patched = False if gptq.detect_gptq_model(pt_module): try: gptq.patch_model(pt_module) @@ -115,6 +135,8 @@ def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False) finally: if gptq_patched: gptq.unpatch_model(pt_module) + if self.module_extensions: + patch_model.unpatch_model(pt_module, orig_forward_name) if not freeze_by_default and graph_has_ops(scripted.inlined_graph, ["prim::Uninitialized", "prim::unchecked_cast", "aten::append"]): # freeze models with unsupported ops @@ -232,7 +254,8 @@ def visit_subgraph(self, node_visitor) -> None: node, alias_db=self.alias_db, shared_memory=self._shared_memory, - constant_cache=self.constant_cache) + constant_cache=self.constant_cache, + module_extensions=self.module_extensions) self.m_decoders.append(decoder) node_visitor(decoder) @@ -255,13 +278,28 @@ def get_subgraph_decoder(self, index: int): decoder = TorchScriptPythonDecoder(self.pt_module, self.get_subgraphs()[index], alias_db=self.alias_db, - shared_memory=self._shared_memory) + shared_memory=self._shared_memory, + module_extensions=self.module_extensions) self.m_decoders.append(decoder) return decoder def get_op_type(self) -> str: assert isinstance( self.graph_element, torch.Node), "Function can be called only when self.graph_element is of type torch.Node" + if self.graph_element.kind() == "prim::PythonOp": + if hasattr(self.graph_element, 'pyobj') and callable(self.graph_element.pyobj) and hasattr(self.graph_element.pyobj(), '__self__'): + trampoline = self.graph_element.pyobj().__self__ + if hasattr(trampoline, 'target_extension') and isinstance(trampoline.target_extension, ModuleExtension): + target_op = trampoline.target_extension.target_op + if callable(target_op): + target = target_op(trampoline.original_module) + elif isinstance(target_op, str): + target = target_op + # TODO: Support target as a callable that will play a role of ConversionExtension for an entire module instead of a single op. + # Without supporting target as a callable here, ConversionExtension functionality is still possible to implement + # by combining two extensions: ModuleExtension that use temporary name as a target op and another extension of type ConversionExtension + # that translates that particular temporary name to custom graph. But providing conversion code as a callable `target` is more convenient. + return target return self.graph_element.kind() def get_schema(self) -> str: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index 157592f3aabee0..599ea7398a1488 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -1,4 +1,3 @@ - # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/src/bindings/python/src/openvino/properties/hint/__init__.py b/src/bindings/python/src/openvino/properties/hint/__init__.py index 5ff211301f9c74..1624325ea5e9e2 100644 --- a/src/bindings/python/src/openvino/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/properties/hint/__init__.py @@ -5,6 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType +from openvino._pyopenvino.properties.hint import ModelDistributionPolicy from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode diff --git a/src/bindings/python/src/openvino/runtime/opset14/__init__.py b/src/bindings/python/src/openvino/runtime/opset14/__init__.py index c43adc3c50a77c..52ac785bd723e5 100644 --- a/src/bindings/python/src/openvino/runtime/opset14/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset14/__init__.py @@ -14,7 +14,7 @@ from openvino.runtime.opset6.ops import assign from openvino.runtime.opset1.ops import atan from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool +from openvino.runtime.opset14.ops import avg_pool from openvino.runtime.opset5.ops import batch_norm_inference from openvino.runtime.opset2.ops import batch_to_space from openvino.runtime.opset1.ops import binary_convolution @@ -103,7 +103,7 @@ from openvino.runtime.opset5.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset8.ops import max_pool +from openvino.runtime.opset14.ops import max_pool from openvino.runtime.opset1.ops import maximum from openvino.runtime.opset1.ops import minimum from openvino.runtime.opset4.ops import mish diff --git a/src/bindings/python/src/openvino/runtime/opset14/ops.py b/src/bindings/python/src/openvino/runtime/opset14/ops.py index 482b2a0d7c2c9b..fb70f1c9588f12 100644 --- a/src/bindings/python/src/openvino/runtime/opset14/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset14/ops.py @@ -4,17 +4,20 @@ """Factory functions for ops added to openvino opset14.""" from functools import partial -from typing import Union + +from typing import Union, Optional, List from openvino.runtime import Node, Type from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.types import TensorShape from openvino.runtime.utils.decorators import nameable_op -from openvino.runtime.utils.types import NodeInput, as_nodes +from openvino.runtime.utils.types import NodeInput, as_node, as_nodes _get_node_factory_opset14 = partial(_get_node_factory, "opset14") # -------------------------------------------- ops ------------------------------------------------ + @nameable_op def convert_promote_types( left_node: NodeInput, @@ -62,3 +65,103 @@ def inverse( } return _get_node_factory_opset14().create("Inverse", inputs, attributes) + + +@nameable_op +def max_pool( + data: NodeInput, + strides: List[int], + dilations: List[int], + pads_begin: List[int], + pads_end: List[int], + kernel_shape: TensorShape, + rounding_type: str = "floor", + auto_pad: Optional[str] = None, + index_element_type: Optional[Union[str, Type]] = "i64", + axis: Optional[int] = 0, + name: Optional[str] = None, +) -> Node: + """Perform max pooling operation and return both values and indices of the selected elements. + + :param data: The node providing input data. + :param strides: The distance (in pixels) to slide the filter on the feature map + over the axes. + :param dilations: The dilation of filter elements(distance between elements). + :param pads_begin: The number of pixels to add at the beginning along each axis. + :param pads_end: The number of pixels to add at the end along each axis. + :param kernel_shape: The pooling operation kernel shape. + :param rounding_type: Determines used rounding schema when computing output shape. + Acceptable values are: ['floor', 'ceil', 'ceil_torch']. Defaults to 'floor'. + :param auto_pad: Determines how the padding is calculated. Acceptable values: + [None, 'same_upper', 'same_lower', 'valid']. Defaults to None. + :param index_element_type: The data type used for the indices output of this operator. + Defaults to i64. + :param axis: The first dimension in the data shape used to determine the maximum + returned index value. The value is the product of all dimensions + starting at the provided axis. Defaults to 0. + :param name: The optional name for the created output node. + + :return: The new node performing max pooling operation. + """ + if auto_pad is None: + auto_pad = "explicit" + return _get_node_factory_opset14().create( + "MaxPool", + [as_node(data)], + { + "strides": strides, + "dilations": dilations, + "pads_begin": pads_begin, + "pads_end": pads_end, + "kernel": kernel_shape, + "rounding_type": rounding_type.upper(), + "auto_pad": auto_pad.upper(), + "index_element_type": index_element_type, + "axis": axis, + }, + ) + + +@nameable_op +def avg_pool( + data_batch: NodeInput, + strides: List[int], + pads_begin: TensorShape, + pads_end: TensorShape, + kernel_shape: TensorShape, + exclude_pad: bool, + rounding_type: str = "floor", + auto_pad: Optional[str] = None, + name: Optional[str] = None, +) -> Node: + """Return average pooling node. + + :param data_batch: The input node providing data. + :param strides: The window movement strides. + :param pads_begin: The number of pixels to add at the beginning along each axis. + :param pads_end: The number of pixels to add at the end along each axis. + :param kernel_shape: The pooling window shape. + :param exclude_pad: Whether or not to include zero padding in average computations. + :param rounding_type: Determines used rounding schema when computing output shape. Acceptable + values are: ['floor', 'ceil', 'ceil_torch']. Defaults to 'floor'. + :param auto_pad: Determines how the padding is calculated. Acceptable values: + [None, 'same_upper', 'same_lower', 'valid']. Defaults to None. + :param name: Optional name for the new output node. + + :return: New node with AvgPool operation applied on its data. + """ + if auto_pad is None: + auto_pad = "explicit" + return _get_node_factory_opset14().create( + "AvgPool", + [as_node(data_batch)], + { + "strides": strides, + "pads_begin": pads_begin, + "pads_end": pads_end, + "kernel": kernel_shape, + "exclude-pad": exclude_pad, + "rounding_type": rounding_type.upper(), + "auto_pad": auto_pad.upper(), + }, + ) diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py index cce898891e4af3..dd90ded374ca11 100644 --- a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py @@ -5,6 +5,7 @@ # Enums from openvino._pyopenvino.properties.hint import Priority from openvino._pyopenvino.properties.hint import SchedulingCoreType +from openvino._pyopenvino.properties.hint import ModelDistributionPolicy from openvino._pyopenvino.properties.hint import ExecutionMode from openvino._pyopenvino.properties.hint import PerformanceMode @@ -14,6 +15,7 @@ from openvino._pyopenvino.properties.hint import performance_mode from openvino._pyopenvino.properties.hint import enable_cpu_pinning from openvino._pyopenvino.properties.hint import scheduling_core_type +from openvino._pyopenvino.properties.hint import model_distribution_policy from openvino._pyopenvino.properties.hint import enable_hyper_threading from openvino._pyopenvino.properties.hint import execution_mode from openvino._pyopenvino.properties.hint import num_requests diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 6ed59721c59d88..f1edeaa18ff1ef 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -71,6 +71,9 @@ void regmodule_properties(py::module m) { .value("PCORE_ONLY", ov::hint::SchedulingCoreType::PCORE_ONLY) .value("ECORE_ONLY", ov::hint::SchedulingCoreType::ECORE_ONLY); + py::enum_(m_hint, "ModelDistributionPolicy", py::arithmetic()) + .value("TENSOR_PARALLEL", ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL); + py::enum_(m_hint, "ExecutionMode", py::arithmetic()) .value("PERFORMANCE", ov::hint::ExecutionMode::PERFORMANCE) .value("ACCURACY", ov::hint::ExecutionMode::ACCURACY); @@ -81,6 +84,7 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode"); wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning"); wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type"); + wrap_property_RW(m_hint, ov::hint::model_distribution_policy, "model_distribution_policy"); wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading"); wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode"); wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests"); diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index d743c9d31ce32a..ffbcf3e4ac730f 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -176,6 +177,8 @@ py::object from_ov_any(const ov::Any& any) { return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); + } else if (any.is>()) { + return py::cast(any.as>()); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -375,6 +378,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); + } else if (py::isinstance>(py_obj)) { + return py::cast>(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { diff --git a/src/bindings/python/tests/test_graph/test_pooling.py b/src/bindings/python/tests/test_graph/test_pooling.py index 746c580a4ea00a..9df293692f910f 100644 --- a/src/bindings/python/tests/test_graph/test_pooling.py +++ b/src/bindings/python/tests/test_graph/test_pooling.py @@ -5,263 +5,230 @@ import numpy as np import pytest -import openvino.runtime.opset8 as ov +import openvino.runtime.opset14 as ov from openvino import Type -@pytest.fixture() -def ndarray_1x1x4x4(): - return np.arange(11, 27, dtype=np.float32).reshape(1, 1, 4, 4) - - -def test_avg_pool_2d(ndarray_1x1x4x4): - input_data = ndarray_1x1x4x4 - param = ov.parameter(input_data.shape, name="A", dtype=np.float32) - - kernel_shape = [2, 2] - spatial_dim_count = len(kernel_shape) - pads_begin = [0] * spatial_dim_count - pads_end = [0] * spatial_dim_count - strides = [2, 2] - exclude_pad = True - - node = ov.avg_pool(param, strides, pads_begin, pads_end, kernel_shape, exclude_pad) - assert node.get_type_name() == "AvgPool" - assert node.get_output_size() == 1 - assert list(node.get_output_shape(0)) == [1, 1, 2, 2] - assert node.get_output_element_type(0) == Type.f32 - - -def test_avg_pooling_3d(ndarray_1x1x4x4): - data = ndarray_1x1x4x4 - data = np.broadcast_to(data, (1, 1, 4, 4, 4)) - param = ov.parameter(list(data.shape)) - kernel_shape = [2, 2, 2] - strides = [2, 2, 2] - spatial_dim_count = len(kernel_shape) - pads_begin = [0] * spatial_dim_count - pads_end = [0] * spatial_dim_count - exclude_pad = True - - node = ov.avg_pool(param, strides, pads_begin, pads_end, kernel_shape, exclude_pad) +avg_pooling_test_params = [ + ( + [ + [2, 2], # strides + [0, 0], # pads_begin + [0, 0], # pads_end + [2, 2], # kernel_shape + True, # exclude_pad + "floor", # rounding_type + ], + [1, 1, 4, 4], # input_shape + [1, 1, 2, 2], # expected_output_shape + ), + ( + [ + [2, 2], # strides + [1, 1], # pads_begin + [1, 1], # pads_end + [2, 2], # kernel_shape + False, # exclude_pad + "ceil_torch", # rounding_type + ], + [1, 1, 5, 5], # input_shape + [1, 1, 3, 3], # expected_output_shape + ), + ( + [ + [2, 2], # strides + [1, 1], # pads_begin + [1, 1], # pads_end + [2, 2], # kernel_shape + False, # exclude_pad + "ceil_torch", # rounding_type + ], + [1, 3, 9, 9], # input_shape + [1, 3, 5, 5], # expected_output_shape + ), + ( + [ + [2, 2], # strides + [0, 0], # pads_begin + [0, 0], # pads_end + [3, 3], # kernel_shape + False, # exclude_pad + "ceil_torch", # rounding_type + ], + [1, 3, 10, 10], # input_shape + [1, 3, 5, 5], # expected_output_shape + ), + ( + [ + [1, 1], # strides + [0, 0], # pads_begin + [0, 0], # pads_end + [3, 3], # kernel_shape + False, # exclude_pad + "ceil_torch", # rounding_type + ], + [1, 3, 10, 10], # input_shape + [1, 3, 8, 8], # expected_output_shape + ), + ( + [ + [2, 2, 2], # strides + [0, 0, 0], # pads_begin + [0, 0, 0], # pads_end + [2, 2, 2], # kernel_shape + True, # exclude_pad + "ceil_torch", # rounding_type + ], + [1, 1, 4, 4, 4], # input_shape + [1, 1, 2, 2, 2], # expected_output_shape + ), +] + + +max_pooling_test_params = [ + ( + [ + [1, 1], # strides + [1, 1], # dilations + [0, 0], # pads_begin + [0, 0], # pads_end + [2, 2], # kernel_shape + "floor", # rounding_type + None, # auto_pad + ], + [1, 1, 4, 4], # input_shape + [1, 1, 3, 3], # expected_output_shape + ), + ( + [ + [2, 1], # strides + [1, 1], # dilations + [0, 0], # pads_begin + [0, 0], # pads_end + [2, 2], # kernel_shape + "floor", # rounding_type + None, # auto_pad + ], + [1, 1, 4, 4], # input_shape + [1, 1, 2, 3], # expected_output_shape + ), + ( + [ + [1, 1], # strides + [1, 1], # dilations + [0, 0], # pads_begin + [0, 0], # pads_end + [1, 1], # kernel_shape + "floor", # rounding_type + None, # auto_pad + ], + [1, 1, 4, 4], # input_shape + [1, 1, 4, 4], # expected_output_shape + ), + ( + [ + [1, 1], # strides + [1, 1], # dilations + [0, 0], # pads_begin + [0, 0], # pads_end + [3, 3], # kernel_shape + "floor", # rounding_type + None, # auto_pad + ], + [1, 1, 4, 4], # input_shape + [1, 1, 2, 2], # expected_output_shape + ), + ( + [ + [1, 1], # strides + [1, 1], # dilations + [1, 1], # pads_begin + [1, 1], # pads_end + [2, 2], # kernel_shape + "floor", # rounding_type + None, # auto_pad + ], + [1, 1, 4, 4], # input_shape + [1, 1, 5, 5], # expected_output_shape + ), + ( + [ + [1, 1], # strides + [1, 1], # dilations + [0, 0], # pads_begin + [0, 0], # pads_end + [2, 2], # kernel_shape + "floor", # rounding_type + "same_upper", # auto_pad + ], + [1, 1, 4, 4], # input_shape + [1, 1, 4, 4], # expected_output_shape + ), + ( + [ + [2, 2], # strides + [1, 1], # dilations + [1, 1], # pads_begin + [1, 1], # pads_end + [2, 2], # kernel_shape + "ceil_torch", # rounding_type + None, # auto_pad + ], + [1, 1, 5, 5], # input_shape + [1, 1, 3, 3], # expected_output_shape + ), + ( + [ + [1, 1], # strides + [1, 1], # dilations + [0, 0], # pads_begin + [0, 0], # pads_end + [2, 2], # kernel_shape + "ceil_torch", # rounding_type + "same_lower", # auto_pad + ], + [1, 1, 4, 4], # input_shape + [1, 1, 4, 4], # expected_output_shape + ), + ( + [ + [1, 1], # strides + [1, 1], # dilations + [0, 0], # pads_begin + [0, 0], # pads_end + [3, 3], # kernel_shape + "ceil_torch", # rounding_type + None, # auto_pad + ], + [1, 1, 10, 10], # input_shape + [1, 1, 8, 8], # expected_output_shape + ), +] + + +@pytest.mark.parametrize( + ("op_params", "input_shape", "expected_output_shape"), + avg_pooling_test_params, +) +def test_avg_pool(op_params, input_shape, expected_output_shape): + param = ov.parameter(input_shape, name="A", dtype=np.float32) + node = ov.avg_pool(param, *op_params) assert node.get_type_name() == "AvgPool" assert node.get_output_size() == 1 - assert list(node.get_output_shape(0)) == [1, 1, 2, 2, 2] + assert list(node.get_output_shape(0)) == expected_output_shape assert node.get_output_element_type(0) == Type.f32 -def test_max_pool_basic(): - data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) - strides = [1, 1] - dilations = [1, 1] - pads_begin = [0, 0] - pads_end = [0, 0] - kernel_shape = [2, 2] - rounding_type = "floor" - auto_pad = None - index_et = "i32" - - data_node = ov.parameter(data.shape, name="A", dtype=np.float32) - node = ov.max_pool( - data_node, - strides, - dilations, - pads_begin, - pads_end, - kernel_shape, - rounding_type, - auto_pad, - index_et, - ) - assert node.get_type_name() == "MaxPool" - assert node.get_output_size() == 2 - assert list(node.get_output_shape(0)) == [1, 1, 3, 3] - assert list(node.get_output_shape(1)) == [1, 1, 3, 3] - assert node.get_output_element_type(0) == Type.f32 - assert node.get_output_element_type(1) == Type.i32 - - -def test_max_pool_strides(): - data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) - strides = [2, 1] - dilations = [1, 1] - pads_begin = [0, 0] - pads_end = [0, 0] - kernel_shape = [2, 2] - rounding_type = "floor" - auto_pad = None - index_et = "i32" - - data_node = ov.parameter(data.shape, name="A", dtype=np.float32) - node = ov.max_pool( - data_node, - strides, - dilations, - pads_begin, - pads_end, - kernel_shape, - rounding_type, - auto_pad, - index_et, - ) - assert node.get_type_name() == "MaxPool" - assert node.get_output_size() == 2 - assert list(node.get_output_shape(0)) == [1, 1, 2, 3] - assert list(node.get_output_shape(1)) == [1, 1, 2, 3] - assert node.get_output_element_type(0) == Type.f32 - assert node.get_output_element_type(1) == Type.i32 - - -def test_max_pool_kernel_shape1x1(): - data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) - strides = [1, 1] - dilations = [1, 1] - pads_begin = [0, 0] - pads_end = [0, 0] - kernel_shape = [1, 1] - rounding_type = "floor" - auto_pad = None - index_et = "i32" - - data_node = ov.parameter(data.shape, name="A", dtype=np.float32) - node = ov.max_pool( - data_node, - strides, - dilations, - pads_begin, - pads_end, - kernel_shape, - rounding_type, - auto_pad, - index_et, - ) - assert node.get_type_name() == "MaxPool" - assert node.get_output_size() == 2 - assert list(node.get_output_shape(0)) == [1, 1, 4, 4] - assert list(node.get_output_shape(1)) == [1, 1, 4, 4] - assert node.get_output_element_type(0) == Type.f32 - assert node.get_output_element_type(1) == Type.i32 - - -def test_max_pool_kernel_shape3x3(): - data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) - strides = [1, 1] - dilations = [1, 1] - pads_begin = [0, 0] - pads_end = [0, 0] - kernel_shape = [3, 3] - rounding_type = "floor" - auto_pad = None - index_et = "i32" - - data_node = ov.parameter(data.shape, name="A", dtype=np.float32) - node = ov.max_pool( - data_node, - strides, - dilations, - pads_begin, - pads_end, - kernel_shape, - rounding_type, - auto_pad, - index_et, - ) - assert node.get_type_name() == "MaxPool" - assert node.get_output_size() == 2 - assert list(node.get_output_shape(0)) == [1, 1, 2, 2] - assert list(node.get_output_shape(1)) == [1, 1, 2, 2] - assert node.get_output_element_type(0) == Type.f32 - assert node.get_output_element_type(1) == Type.i32 - - -def test_max_pool_non_zero_pads(): - data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) - strides = [1, 1] - dilations = [1, 1] - pads_begin = [1, 1] - pads_end = [1, 1] - kernel_shape = [2, 2] - rounding_type = "floor" - auto_pad = None - index_et = "i32" - - data_node = ov.parameter(data.shape, name="A", dtype=np.float32) - node = ov.max_pool( - data_node, - strides, - dilations, - pads_begin, - pads_end, - kernel_shape, - rounding_type, - auto_pad, - index_et, - ) - assert node.get_type_name() == "MaxPool" - assert node.get_output_size() == 2 - assert list(node.get_output_shape(0)) == [1, 1, 5, 5] - assert list(node.get_output_shape(1)) == [1, 1, 5, 5] - assert node.get_output_element_type(0) == Type.f32 - assert node.get_output_element_type(1) == Type.i32 - - -def test_max_pool_same_upper_auto_pads(): - data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) - strides = [1, 1] - dilations = [1, 1] - pads_begin = [0, 0] - pads_end = [0, 0] - kernel_shape = [2, 2] - auto_pad = "same_upper" - rounding_type = "floor" - index_et = "i32" - - data_node = ov.parameter(data.shape, name="A", dtype=np.float32) - node = ov.max_pool( - data_node, - strides, - dilations, - pads_begin, - pads_end, - kernel_shape, - rounding_type, - auto_pad, - index_et, - ) - assert node.get_type_name() == "MaxPool" - assert node.get_output_size() == 2 - assert list(node.get_output_shape(0)) == [1, 1, 4, 4] - assert list(node.get_output_shape(1)) == [1, 1, 4, 4] - assert node.get_output_element_type(0) == Type.f32 - assert node.get_output_element_type(1) == Type.i32 - - -def test_max_pool_same_lower_auto_pads(): - data = np.arange(0.5, 16, dtype=np.float32).reshape((1, 1, 4, 4)) - strides = [1, 1] - dilations = [1, 1] - pads_begin = [0, 0] - pads_end = [0, 0] - kernel_shape = [2, 2] - auto_pad = "same_lower" - rounding_type = "floor" - index_et = "i32" - - data_node = ov.parameter(data.shape, name="A", dtype=np.float32) - node = ov.max_pool( - data_node, - strides, - dilations, - pads_begin, - pads_end, - kernel_shape, - rounding_type, - auto_pad, - index_et, - ) +@pytest.mark.parametrize( + ("op_params", "input_shape", "expected_output_shape"), + max_pooling_test_params, +) +def test_max_pool(op_params, input_shape, expected_output_shape): + data_node = ov.parameter(input_shape, name="A", dtype=np.float32) + node = ov.max_pool(data_node, *op_params, "i32") assert node.get_type_name() == "MaxPool" assert node.get_output_size() == 2 - assert list(node.get_output_shape(0)) == [1, 1, 4, 4] - assert list(node.get_output_shape(1)) == [1, 1, 4, 4] + assert list(node.get_output_shape(0)) == expected_output_shape + assert list(node.get_output_shape(1)) == expected_output_shape assert node.get_output_element_type(0) == Type.f32 assert node.get_output_element_type(1) == Type.i32 diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index c637c483d86478..d4ad725679a351 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -86,6 +86,12 @@ def test_properties_rw_base(): (hints.SchedulingCoreType.ECORE_ONLY, "SchedulingCoreType.ECORE_ONLY", 2), ), ), + ( + hints.ModelDistributionPolicy, + ( + (hints.ModelDistributionPolicy.TENSOR_PARALLEL, "ModelDistributionPolicy.TENSOR_PARALLEL", 0), + ), + ), ( hints.ExecutionMode, ( @@ -279,6 +285,13 @@ def test_properties_ro(ov_property_ro, expected_value): "SCHEDULING_CORE_TYPE", ((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),), ), + ( + hints.model_distribution_policy, + "MODEL_DISTRIBUTION_POLICY", + ( + ({hints.ModelDistributionPolicy.TENSOR_PARALLEL}, {hints.ModelDistributionPolicy.TENSOR_PARALLEL}), + ), + ), ( hints.enable_hyper_threading, "ENABLE_HYPER_THREADING", diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 70224751f1f810..fa32b77e027423 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -257,16 +257,10 @@ auto Subgraph::wrap_node_as_subgraph(const std::shared_ptr& node) -> s } void Subgraph::fill_empty_output_names(const Output& target_output_node, const Output& replacement_output_node) { - OPENVINO_SUPPRESS_DEPRECATED_START auto& out_tensor = target_output_node.get_tensor(); - const std::string new_name = ov::op::util::get_ie_output_name(replacement_output_node); - if (ov::descriptor::get_ov_tensor_legacy_name(out_tensor).empty()) { - ov::descriptor::set_ov_tensor_legacy_name(out_tensor, new_name); - } if (!replacement_output_node.get_names().empty()) { out_tensor.set_names(replacement_output_node.get_names()); } - OPENVINO_SUPPRESS_DEPRECATED_END } auto Subgraph::constant_input_should_be_inside_body(const std::shared_ptr& node) -> bool { diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp index 1cd38b4caa0b37..3252882472ffec 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp @@ -10,6 +10,7 @@ #include "openvino/core/rt_info.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/convert.hpp" +#include "openvino/op/if.hpp" #include "openvino/op/non_max_suppression.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/slice.hpp" @@ -18,7 +19,9 @@ #include "openvino/op/unsqueeze.hpp" #include "openvino/op/util/broadcast_base.hpp" #include "openvino/op/util/gather_base.hpp" +#include "openvino/op/util/multi_subgraph_base.hpp" #include "openvino/op/variadic_split.hpp" +#include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/rt_info/nms_selected_indices.hpp" @@ -60,14 +63,53 @@ class PropagateNMSPath : public pass::MatcherPass { ov::op::v1::VariadicSplit, op::util::GatherBase, ov::op::v0::Concat, - ov::op::v0::Convert>(); + ov::op::v0::Convert, + ov::op::v8::If>(); matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto propagate_path = [](const ov::OutputVector& input_nodes, ov::Node* target_node) { + if (any_of(input_nodes.begin(), input_nodes.end(), [](const Output& output) { + return ov::has_nms_selected_indices(output.get_node()); + })) { + ov::set_nms_selected_indices(target_node); + } + }; + auto handle_params = [&propagate_path](std::shared_ptr node, + std::shared_ptr body, + int body_index) { + const auto& params = body->get_parameters(); + for (auto input_desc : node->get_input_descriptions(body_index)) { + auto param = params[input_desc->m_body_parameter_index]; + auto input_node = node->input(input_desc->m_input_index).get_source_output(); + propagate_path({input_node}, param.get()); + } + }; + auto handle_results = [&propagate_path](std::shared_ptr node, + std::shared_ptr body, + int body_index) { + const auto& results = body->get_results(); + for (auto output_desc : node->get_output_descriptions(body_index)) { + auto result = results[output_desc->m_body_value_index]; + const auto& result_inputs = result->input_values(); + auto output_node = node->output(output_desc->m_output_index).get_node(); + propagate_path(result_inputs, output_node); + } + }; + auto node = m.get_match_root(); - const auto& inputs = node->input_values(); - if (any_of(inputs.begin(), inputs.end(), [](const Output& output) { - return ov::has_nms_selected_indices(output.get_node()); - })) { - ov::set_nms_selected_indices(node.get()); + if (ov::is_type(node)) { + auto multi_subgraph_op = ov::as_type_ptr(node); + const auto& models = multi_subgraph_op->get_functions(); + + for (size_t body_idx = 0; body_idx < models.size(); ++body_idx) { + handle_params(multi_subgraph_op, models[body_idx], static_cast(body_idx)); + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(models[body_idx]); + handle_results(multi_subgraph_op, models[body_idx], static_cast(body_idx)); + } + } else { + const auto& inputs = node->input_values(); + propagate_path(inputs, node.get()); } return false; }; diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index d5c8204663c242..b29ab74981a483 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -607,7 +607,11 @@ bool fuse_type_to_parameter(const std::shared_ptr& node, auto convert = std::make_shared(param, to); for (auto& input : param_consumers) { const auto consumer = input.get_node(); - if (ov::is_type(consumer) || ov::is_type(consumer)) { + if (ov::is_type(consumer) || ov::is_type(consumer) || + // TODO: refactor after ngraph op defined + // The fourth and fifth inputs are kvcache and should be directly connected to parameters + (consumer->get_type_name() == std::string("PagedAttentionExtension") && + (input.get_index() == 3 || input.get_index() == 4))) { continue; } input.replace_source_output(convert); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp b/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp index e04a25e2373b2e..c416ca6da27e66 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp @@ -46,17 +46,11 @@ ov::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool forc if (nms->output(1).get_element_type() != output_1.get_element_type()) { output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_1.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(1))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_1.get_node_shared_ptr()); } if (nms->output(2).get_element_type() != output_2.get_element_type()) { output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(2))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_2.get_node_shared_ptr()); } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp b/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp index 730788b2c4c8f4..8938333d69be73 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp @@ -60,17 +60,11 @@ pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNms if (nms->output(1).get_element_type() != output_1.get_element_type()) { output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_1.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(1))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_1.get_node_shared_ptr()); } if (nms->output(2).get_element_type() != output_2.get_element_type()) { output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(2))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_2.get_node_shared_ptr()); } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp index 39833c645f7fff..1da24df80ee2a7 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp @@ -87,18 +87,12 @@ ov::pass::ConvertNMSRotatedToNMSIEInternal::ConvertNMSRotatedToNMSIEInternal() { Output output_0 = nms_legacy->output(0); if (nms_rotated->output(0).get_element_type() != output_0.get_element_type()) { output_0 = std::make_shared(output_0, nms_rotated->output(0).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_0.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(0))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_0.get_node_shared_ptr()); } Output output_2 = nms_legacy->output(2); if (nms_rotated->output(2).get_element_type() != output_2.get_element_type()) { output_2 = std::make_shared(output_2, nms_rotated->output(2).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(2))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_2.get_node_shared_ptr()); } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp b/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp index be45d9940272ad..ba41f7812fe593 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp @@ -108,18 +108,12 @@ ov::pass::ConvertNMSToNMSIEInternal::ConvertNMSToNMSIEInternal() { Output output_0 = nms_legacy->output(0); if (nms_5->output(0).get_element_type() != output_0.get_element_type()) { output_0 = std::make_shared(output_0, nms_5->output(0).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_0.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_5->output(0))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_0.get_node_shared_ptr()); } Output output_2 = nms_legacy->output(2); if (nms_5->output(2).get_element_type() != output_2.get_element_type()) { output_2 = std::make_shared(output_2, nms_5->output(2).get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_START - output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_5->output(2))); - OPENVINO_SUPPRESS_DEPRECATED_END new_ops.emplace_back(output_2.get_node_shared_ptr()); } diff --git a/src/common/transformations/tests/common_optimizations/convert_nms_gather_path_to_unsigned_test.cpp b/src/common/transformations/tests/common_optimizations/convert_nms_gather_path_to_unsigned_test.cpp index e9763e4d6bfec1..3076e32646eaa1 100644 --- a/src/common/transformations/tests/common_optimizations/convert_nms_gather_path_to_unsigned_test.cpp +++ b/src/common/transformations/tests/common_optimizations/convert_nms_gather_path_to_unsigned_test.cpp @@ -205,3 +205,68 @@ TEST(TransformationTests, test_convert_to_unsigned_nms_gather_3) { ASSERT_NO_THROW(check_rt_info(f)); ASSERT_EQ(count_ops_of_type(f), 0); } + +TEST(TransformationTests, test_convert_to_unsigned_nms_gather_with_if_condition) { + auto boxes = make_shared(element::f32, PartialShape{1, -1, 4}); + auto scores = make_shared(element::f32, PartialShape{1, 1, -1}); + auto nms = make_shared(boxes, scores); + + auto gather = make_shared(nms->output(0), + opset8::Constant::create(element::i32, Shape{1}, {2}), + opset8::Constant::create(element::i32, Shape{1}, {0})); + + auto shape_of = make_shared(gather); + auto gather_shape = make_shared(shape_of, + opset8::Constant::create(element::i32, Shape{1}, {0}), + opset8::Constant::create(element::i32, Shape{1}, {0})); + auto equal = make_shared(gather_shape, opset8::Constant::create(element::i64, Shape{1}, {1})); + auto if_op = make_shared(equal); + + auto input_then = make_shared(element::i32, PartialShape{-1, 1}); + + auto start = opset8::Constant::create(element::i32, Shape{1}, {3}); + auto stop = opset8::Constant::create(element::i32, Shape{1}, {4}); + auto step = opset8::Constant::create(element::i32, Shape{1}, {1}); + auto slice = make_shared(input_then, start, stop, step); + + auto then_op_result = make_shared(slice); + auto body_then_function = make_shared(NodeVector{then_op_result}, ParameterVector{input_then}); + + auto input_else = make_shared(element::i32, PartialShape{-1, 1}); + auto reshape = + make_shared(input_else, opset8::Constant::create(element::i32, Shape{1}, {-1}), true); + auto else_op_result = make_shared(reshape); + auto body_else_function = make_shared(NodeVector{else_op_result}, ParameterVector{input_else}); + + if_op->set_then_body(body_then_function); + if_op->set_else_body(body_else_function); + if_op->set_input(gather, input_then, input_else); + + auto result_if = if_op->set_output(then_op_result, else_op_result); + + auto begin = opset8::Constant::create(element::i32, Shape{1}, {3}); + auto end = opset8::Constant::create(element::i32, Shape{1}, {4}); + auto strides = opset8::Constant::create(element::i32, Shape{1}, {1}); + auto ss_node = + make_shared(result_if, begin, end, strides, vector{1, 0}, vector{1, 0}); + + auto data = make_shared(element::f32, PartialShape{-1}); + auto axis = opset8::Constant::create(element::i32, Shape{1}, {0}); + auto target_gather = make_shared(data, ss_node, axis); + + shared_ptr f = make_shared(NodeVector{target_gather}, ParameterVector{boxes, scores, data}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + + const auto& ops = f->get_ops(); + const auto& gather_it = find(ops.begin(), ops.end(), target_gather); + ASSERT_NE(gather_it, ops.end()); + + const auto& rti = (*gather_it)->get_rt_info(); + const auto& reverse = rti.find("dontReverseIndices"); + ASSERT_NE(reverse, rti.end()); +} diff --git a/src/common/transformations/tests/op_conversions/convert_nms_to_nms_ie_internal_test.cpp b/src/common/transformations/tests/op_conversions/convert_nms_to_nms_ie_internal_test.cpp index 81d66bc18f464d..597817ae543a03 100644 --- a/src/common/transformations/tests/op_conversions/convert_nms_to_nms_ie_internal_test.cpp +++ b/src/common/transformations/tests/op_conversions/convert_nms_to_nms_ie_internal_test.cpp @@ -26,52 +26,6 @@ using namespace testing; using namespace ov; -TEST_F(TransformationTestsF, ConvertNMS1ToNMSIEInternal) { - { - auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); - auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); - auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10}); - auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75}); - auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7}); - auto nms = std::make_shared(boxes, - scores, - max_output_boxes_per_class, - iou_threshold, - score_threshold, - op::v1::NonMaxSuppression::BoxEncodingType::CORNER, - true); - - model = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); - - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - - // as inside test infrastructure we can not predict output names for given Model - // we have to enable soft names comparison manually - enable_soft_names_comparison(); - } - - { - auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); - auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); - auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10}); - auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75}); - auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7}); - auto nms = std::make_shared(boxes, - scores, - max_output_boxes_per_class, - iou_threshold, - score_threshold, - 0, - true, - element::i32); - auto convert = std::make_shared(nms->output(0), element::i64); - - model_ref = std::make_shared(NodeVector{convert}, ParameterVector{boxes, scores}); - } -} - TEST_F(TransformationTestsF, ConvertNMS3ToNMSIEInternal) { { auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); diff --git a/src/core/src/op/divide.cpp b/src/core/src/op/divide.cpp index b00b731b296351..d903c00c681dce 100644 --- a/src/core/src/op/divide.cpp +++ b/src/core/src/op/divide.cpp @@ -246,7 +246,7 @@ bool Divide::evaluate(TensorVector& outputs, const TensorVector& inputs) const { this, outputs, inputs, - OV_PP_ET_LIST(f32, i32, i64, u32, u64), + OV_PP_ET_LIST(f32, i8, i32, i64, u8, u32, u64), divide::Evaluate, inputs[0].get_element_type(), inputs[0], diff --git a/src/core/src/op/multiply.cpp b/src/core/src/op/multiply.cpp index fa3ef518c03202..88dbd347d46edf 100644 --- a/src/core/src/op/multiply.cpp +++ b/src/core/src/op/multiply.cpp @@ -51,7 +51,7 @@ bool Multiply::evaluate(TensorVector& outputs, const TensorVector& inputs) const this, outputs, inputs, - OV_PP_ET_LIST(f32, f64, i32, i64, u32, u64), + OV_PP_ET_LIST(f32, f64, i8, i32, i64, u8, u32, u64), multiply::Evaluate, inputs[0].get_element_type(), inputs[0], diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt index e53ee551f07063..681fa778e2be12 100644 --- a/src/frontends/onnx/frontend/CMakeLists.txt +++ b/src/frontends/onnx/frontend/CMakeLists.txt @@ -10,7 +10,7 @@ ov_add_frontend(NAME onnx FILEDESCRIPTION "FrontEnd to load and convert ONNX file format" LINK_LIBRARIES openvino_onnx_common openvino::core::dev) -set(ONNX_OPSET_VERSION 18 CACHE INTERNAL "Supported version of ONNX operator set") +set(ONNX_OPSET_VERSION 20 CACHE INTERNAL "Supported version of ONNX operator set") target_compile_definitions(${TARGET_NAME} PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION}) ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME} diff --git a/src/frontends/onnx/frontend/src/op/reduce.cpp b/src/frontends/onnx/frontend/src/op/reduce.cpp index 9a8c1cd4ec3ea2..d9108c8b6fcf30 100644 --- a/src/frontends/onnx/frontend/src/op/reduce.cpp +++ b/src/frontends/onnx/frontend/src/op/reduce.cpp @@ -8,6 +8,7 @@ #include "identity.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/exp.hpp" #include "openvino/op/log.hpp" #include "openvino/op/multiply.hpp" @@ -94,6 +95,27 @@ const std::set supported_types_v1 = {element::u32, element::u64, element::i32, element::i64, element::f16, element::f32, element::f64}; const std::set supported_types_v2 = {element::u32, element::u64, element::i32, element::i64, element::f16, element::f32, element::f64, element::bf16}; +const std::set supported_types_v3 = {element::u32, + element::u64, + element::i32, + element::i64, + element::f16, + element::f32, + element::f64, + element::bf16, + element::i8, + element::u8}; +const std::set supported_types_v4 = {element::u32, + element::u64, + element::i32, + element::i64, + element::f16, + element::f32, + element::f64, + element::bf16, + element::i8, + element::u8, + element::boolean}; template std::shared_ptr make_ov_reduction_op(const Node& node, @@ -177,11 +199,33 @@ namespace set_13 { ov::OutputVector reduce_sum(const ov::frontend::onnx::Node& node) { return {make_ov_reduction_op(node, node.get_ov_inputs().at(0), supported_types_v2, false)}; } +ov::OutputVector reduce_max(const ov::frontend::onnx::Node& node) { + return {make_ov_reduction_op(node, node.get_ov_inputs().at(0), supported_types_v3)}; +} } // namespace set_13 namespace set_18 { -// Placeholder +ov::OutputVector reduce_max(const ov::frontend::onnx::Node& node) { + return {make_ov_reduction_op(node, node.get_ov_inputs().at(0), supported_types_v3, false)}; +} } // namespace set_18 + +namespace set_20 { +ov::OutputVector reduce_max(const ov::frontend::onnx::Node& node) { + auto data = node.get_ov_inputs().at(0); + if (data.get_element_type() != element::boolean) { + return {make_ov_reduction_op(node, data, supported_types_v3, false)}; + } else { + // Handling boolean as a uint8 + return {std::make_shared( + make_ov_reduction_op(node, + std::make_shared(data, element::u8), + supported_types_v4, + false), + element::boolean)}; + } +} +} // namespace set_20 } // namespace op } // namespace onnx } // namespace frontend diff --git a/src/frontends/onnx/frontend/src/op/reduce.hpp b/src/frontends/onnx/frontend/src/op/reduce.hpp index defa7c4e33dce2..3acdc3677e77da 100644 --- a/src/frontends/onnx/frontend/src/op/reduce.hpp +++ b/src/frontends/onnx/frontend/src/op/reduce.hpp @@ -29,6 +29,15 @@ ov::OutputVector reduce_l2(const ov::frontend::onnx::Node& node); namespace set_1 { ov::OutputVector reduce_max(const ov::frontend::onnx::Node& node); } // namespace set_1 +namespace set_13 { +ov::OutputVector reduce_max(const ov::frontend::onnx::Node& node); +} // namespace set_13 +namespace set_18 { +ov::OutputVector reduce_max(const ov::frontend::onnx::Node& node); +} // namespace set_18 +namespace set_20 { +ov::OutputVector reduce_max(const ov::frontend::onnx::Node& node); +} // namespace set_20 namespace set_1 { ov::OutputVector reduce_mean(const ov::frontend::onnx::Node& node); diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp index 2e4d60d87e60ab..6350c902ba5761 100644 --- a/src/frontends/onnx/frontend/src/ops_bridge.cpp +++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp @@ -484,6 +484,9 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("ReduceL1", 1, reduce_l1); REGISTER_OPERATOR("ReduceL2", 1, reduce_l2); REGISTER_OPERATOR("ReduceMax", 1, reduce_max); + REGISTER_OPERATOR("ReduceMax", 13, reduce_max); + REGISTER_OPERATOR("ReduceMax", 18, reduce_max); + REGISTER_OPERATOR("ReduceMax", 20, reduce_max); REGISTER_OPERATOR("ReduceMean", 1, reduce_mean); REGISTER_OPERATOR("ReduceMin", 1, reduce_min); REGISTER_OPERATOR("ReduceProd", 1, reduce_prod); diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py index 1a66a0624cf8e0..a7d8744262f106 100644 --- a/src/frontends/onnx/tests/__init__.py +++ b/src/frontends/onnx/tests/__init__.py @@ -59,7 +59,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_99954 = xfail_test(reason="Constant Pad - RuntimeError: Shape inference of Reference node with name y failed") xfail_issue_99955 = xfail_test(reason="GroupNorm is not supported") xfail_issue_99957 = xfail_test(reason="LayerNorm - RuntimeError: While validating node ''") -xfail_issue_99958 = xfail_test(reason="LogSoftmax - Results mismatch") xfail_issue_99960 = xfail_test(reason="MVN - Results mismatch") xfail_issue_99961 = xfail_test(reason="Optional has/get element operators are not supported)'") xfail_issue_99962 = pytest.mark.skip(reason="ReduceL1/L2 - Unrecognized attribute: axes for operator ReduceL1/L2") @@ -71,7 +70,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_99970 = xfail_test(reason="Scatter and ScatterND - RuntimeError: Check '(reduction == none)' failed at " "src/frontends/onnx/frontend/src/op/scatter_elements.cpp OR at " "src/frontends/onnx/frontend/src/op/scatter_nd") -xfail_issue_99972 = xfail_test(reason="Softmax - Results mismatch") xfail_issue_99973 = xfail_test(reason="Split - RuntimeError: While validating ONNX node " "''") xfail_issue_38710 = xfail_test(reason="RuntimeError: data has zero dimension which is not allowed") diff --git a/src/frontends/onnx/tests/models/reduce_max_18.prototxt b/src/frontends/onnx/tests/models/reduce_max_18.prototxt new file mode 100644 index 00000000000000..f08aae650c54c6 --- /dev/null +++ b/src/frontends/onnx/tests/models/reduce_max_18.prototxt @@ -0,0 +1,68 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "A" + input: "axes" + output: "B" + op_type: "ReduceMax" + } + name: "compute_graph" + initializer { + data_type: 6 + dims: 1 + name: "axes" + raw_data: "\002\000\000\000" + } + input { + name: "A" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "axes" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "B" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + } + } + } + } +} +opset_import { + version: 18 +} diff --git a/src/frontends/onnx/tests/models/reduce_wrong_type_v3.prototxt b/src/frontends/onnx/tests/models/reduce_wrong_type_v3.prototxt new file mode 100644 index 00000000000000..8e9110bd532ad9 --- /dev/null +++ b/src/frontends/onnx/tests/models/reduce_wrong_type_v3.prototxt @@ -0,0 +1,48 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "A" + output: "B" + op_type: "ReduceMax" + } + name: "compute_graph" + input { + name: "A" + type { + tensor_type { + elem_type: 9 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "B" + type { + tensor_type { + elem_type: 9 + shape { + dim { + dim_value: 1 + } + } + } + } + } +} +opset_import { + version: 13 +} diff --git a/src/frontends/onnx/tests/models/reduce_wrong_type_v4.prototxt b/src/frontends/onnx/tests/models/reduce_wrong_type_v4.prototxt new file mode 100644 index 00000000000000..a36fafc7ab66f0 --- /dev/null +++ b/src/frontends/onnx/tests/models/reduce_wrong_type_v4.prototxt @@ -0,0 +1,48 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + node { + input: "A" + output: "B" + op_type: "ReduceMax" + } + name: "compute_graph" + input { + name: "A" + type { + tensor_type { + elem_type: 9 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "B" + type { + tensor_type { + elem_type: 9 + shape { + dim { + dim_value: 1 + } + } + } + } + } +} +opset_import { + version: 20 +} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index b02d3a1116131c..b515fd1cb78799 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -974,6 +974,28 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_max) { test_case.run(); } +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_max_18) { + // TEMPLATE plugin has an issue with evaluation for u8 type + if (std::string("${BACKEND_NAME}") == std::string("INTERPRETER")) { + GTEST_SKIP(); + } + + auto model = convert_model("reduce_max_18.onnx"); + + // input data shape (1, 1, 4, 4) + std::vector> inputs{ + ov::test::NDArray({{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}}}) + .get_vector()}; + + // output data shape (1,) + auto expected_output = ov::test::NDArray({13, 14, 15, 16}).get_vector(); + + auto test_case = ov::test::TestCase(model, s_device); + test_case.add_multiple_inputs(inputs); + test_case.add_expected_output(expected_output); + test_case.run(); +} + OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_max_invalid_axes) { EXPECT_THROW(convert_model("reduce_max_invalid_axes.onnx"), ov::Exception); } diff --git a/src/frontends/onnx/tests/onnx_import_exceptions.cpp b/src/frontends/onnx/tests/onnx_import_exceptions.cpp index 586f5bbc3d4d2f..28c0d9aaec1ca2 100644 --- a/src/frontends/onnx/tests/onnx_import_exceptions.cpp +++ b/src/frontends/onnx/tests/onnx_import_exceptions.cpp @@ -92,3 +92,24 @@ TEST(onnx_importer, exception_msg_onnx_reduce_wrong_type_v2) { FAIL() << "The ONNX model importer failed for unexpected reason"; } } + +TEST(onnx_importer, exception_msg_onnx_reduce_wrong_type_v3) { + try { + convert_model("reduce_wrong_type_v3.onnx"); + // Should have thrown, so fail if it didn't + FAIL() << "ONNX Importer did not detected incorrect model!"; + } catch (const ::ov::Exception& e) { + EXPECT_HAS_SUBSTRING(e.what(), std::string("Unsupported input type boolean")); + } + // On MacOS after we re-throw ov::Exception exception, we couldn't catch it as is, + // thus below workaround. + catch (const std::exception& e) { + EXPECT_HAS_SUBSTRING(e.what(), std::string("Unsupported input type boolean")); + } catch (...) { + FAIL() << "The ONNX model importer failed for unexpected reason"; + } +} + +TEST(onnx_importer, no_exception_onnx_reduce_wrong_type_v4) { + EXPECT_NO_THROW(convert_model("reduce_wrong_type_v4.onnx")); +} diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index 2307b5282c235c..229b88b39b31a8 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -51,13 +51,11 @@ xfail_issue_99954, xfail_issue_99955, xfail_issue_99957, - xfail_issue_99958, xfail_issue_99960, xfail_issue_99961, xfail_issue_99968, xfail_issue_99969, xfail_issue_99970, - xfail_issue_99972, xfail_issue_99973, xfail_issue_101965, xfail_issue_113506, @@ -453,10 +451,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_layer_normalization_4d_axis_negative_3_expanded_ver18_cpu", "OnnxBackendNodeModelTest.test_layer_normalization_default_axis_expanded_ver18_cpu", ), - ( - xfail_issue_99958, - "OnnxBackendNodeModelTest.test_logsoftmax_large_number_expanded_ver18_cpu", - ), ( xfail_issue_99960, "OnnxBackendNodeModelTest.test_mvn_expanded_ver18_cpu", @@ -499,12 +493,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_example_cpu", "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_keepdims_random_cpu", "OnnxBackendNodeModelTest.test_reduce_log_sum_negative_axes_cpu", - "OnnxBackendNodeModelTest.test_reduce_max_do_not_keepdims_example_cpu", - "OnnxBackendNodeModelTest.test_reduce_max_do_not_keepdims_random_cpu", - "OnnxBackendNodeModelTest.test_reduce_max_keepdims_example_cpu", - "OnnxBackendNodeModelTest.test_reduce_max_keepdims_random_cpu", - "OnnxBackendNodeModelTest.test_reduce_max_negative_axes_keepdims_example_cpu", - "OnnxBackendNodeModelTest.test_reduce_max_negative_axes_keepdims_random_cpu", "OnnxBackendNodeModelTest.test_reduce_mean_do_not_keepdims_example_cpu", "OnnxBackendNodeModelTest.test_reduce_log_sum_exp_negative_axes_keepdims_random_cpu", "OnnxBackendNodeModelTest.test_reduce_mean_do_not_keepdims_random_cpu", @@ -552,10 +540,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_scatternd_max_cpu", "OnnxBackendNodeModelTest.test_scatternd_min_cpu", ), - ( - xfail_issue_99972, - "OnnxBackendNodeModelTest.test_softmax_large_number_expanded_ver18_cpu", - ), ( xfail_issue_99973, "OnnxBackendNodeModelTest.test_split_1d_uneven_split_opset18_cpu", @@ -732,7 +716,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None ), ( xfail_issue_125495, - "OnnxBackendNodeModelTest.test_reduce_max_bool_inputs_cpu", "OnnxBackendNodeModelTest.test_reduce_min_bool_inputs_cpu", ), ( diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 8f8d54147a3a05..0c2ab370337750 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -290,7 +290,7 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector& va } std::map FrontEnd::get_supported_ops(const ov::frontend::InputModel::Ptr& model) const { - std::map supported_ops = get_supported_ops_fx(); + std::map supported_ops; if (std::dynamic_pointer_cast(model)->decoder_type_name() == "fx") supported_ops = get_supported_ops_fx(); else diff --git a/src/frontends/pytorch/src/input_model.cpp b/src/frontends/pytorch/src/input_model.cpp index 1c06003855c39e..bd7927228b9980 100644 --- a/src/frontends/pytorch/src/input_model.cpp +++ b/src/frontends/pytorch/src/input_model.cpp @@ -24,7 +24,7 @@ InputModel::InputModel(const std::shared_ptr& model_decoder) : m_m const auto& outputs = m_model_decoder->outputs(); for (size_t i = 0; i < outputs.size(); ++i) { auto out_place = std::make_shared(*this, outputs[i]); - m_name_to_place.emplace(std::to_string(inputs[i]), std::dynamic_pointer_cast(out_place)); + m_name_to_place.emplace(std::to_string(outputs[i]), std::dynamic_pointer_cast(out_place)); for (const auto& name : out_place->get_names()) { m_name_to_place.emplace(name, std::dynamic_pointer_cast(out_place)); } diff --git a/src/frontends/pytorch/src/op/any.cpp b/src/frontends/pytorch/src/op/any.cpp new file mode 100644 index 00000000000000..b2e24ec818c1b2 --- /dev/null +++ b/src/frontends/pytorch/src/op/any.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/not_equal.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reduce_logical_or.hpp" +#include "openvino/op/reshape.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +OutputVector translate_any_fx(const NodeContext& context) { + num_inputs_check(context, 1, 3); + auto x = context.get_input(0); + + Output dims; + if (!context.input_is_none(1)) { + dims = context.get_input(1); + } else { + dims = get_axes_range(context, 0); + } + bool keep_dims = false; + if (!context.input_is_none(2)) + keep_dims = context.const_input(2); + auto any = context.mark_node(std::make_shared(x, dims, keep_dims)); + return {any}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/argmax_argmin.cpp b/src/frontends/pytorch/src/op/argmax_argmin.cpp index e052479d5c2202..edb7a938c30b52 100644 --- a/src/frontends/pytorch/src/op/argmax_argmin.cpp +++ b/src/frontends/pytorch/src/op/argmax_argmin.cpp @@ -31,7 +31,8 @@ OutputVector create_argmax_argmin_op(const NodeContext& context, TopKMode mode) } if (!context.input_is_none(1)) { auto axis = context.const_input(1); - auto topk = context.mark_node(std::make_shared(input, k, axis, mode, TopKSortType::NONE)); + auto topk = context.mark_node( + std::make_shared(input, k, axis, mode, TopKSortType::SORT_VALUES, element::i32, true)); indices = context.mark_node(std::make_shared(topk->output(1), element::i64)); if (!keep_dims) { auto axis_to_remove = context.mark_node(v0::Constant::create(element::i32, Shape{}, {axis})); @@ -41,7 +42,8 @@ OutputVector create_argmax_argmin_op(const NodeContext& context, TopKMode mode) int64_t axis = 0; auto minus_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); auto flatten_input = context.mark_node(std::make_shared(input, minus_one, false)); - auto topk = context.mark_node(std::make_shared(flatten_input, k, axis, mode, TopKSortType::NONE)); + auto topk = context.mark_node( + std::make_shared(flatten_input, k, axis, mode, TopKSortType::SORT_VALUES, element::i32, true)); indices = context.mark_node(std::make_shared(topk->output(1), element::i64)); if (keep_dims) { auto input_shape = context.mark_node(std::make_shared(input, element::i32)); diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index 7dfb7ccd796ad7..7a926a38836c0f 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -102,20 +102,24 @@ OutputVector translate_quantized_cat(const NodeContext& context) { }; OutputVector translate_stack_fx(const NodeContext& context) { - num_inputs_check(context, 2, context.get_input_size()); + num_inputs_check(context, 1, context.get_input_size()); auto dim = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); std::deque> list_elems; auto num_elements = context.get_input_size(); - if (num_elements > 2) - num_elements = num_elements - 1; - for (size_t i = 0; i < num_elements; i++) { + for (size_t i = 0; i < num_elements - 1; i++) { auto stack_input = context.mark_node(std::make_shared(context.get_input(static_cast(i)), dim)); list_elems.push_back(stack_input); } int64_t axis = 0; - if (context.get_input_size() > 2) - axis = context.const_input(context.get_input_size() - 1); + if (!context.get_input_type(num_elements - 1).is()) { + // axis can be not present and that means that last input will have List type + axis = context.const_input(num_elements - 1); + } else { + auto stack_input = context.mark_node( + std::make_shared(context.get_input(static_cast(num_elements - 1)), dim)); + list_elems.push_back(stack_input); + } return translate_cat_common(context, list_elems, axis, true); } diff --git a/src/frontends/pytorch/src/op/div.cpp b/src/frontends/pytorch/src/op/div.cpp index f8640f2693f90d..7c091f2c2cb8da 100644 --- a/src/frontends/pytorch/src/op/div.cpp +++ b/src/frontends/pytorch/src/op/div.cpp @@ -90,6 +90,17 @@ OutputVector translate_div_fx(const NodeContext& context) { return translate_div_common(context, x, y, rounding_mode, false); }; +OutputVector translate_div_fx_(const NodeContext& context) { + num_inputs_check(context, 2, 2); + auto x = context.get_input(0); + auto y = context.get_input(1); + std::string rounding_mode = ""; + if (context.has_attribute("rounding_mode")) { + rounding_mode = context.get_attribute("rounding_mode"); + } + return translate_div_common(context, x, y, rounding_mode, true); +}; + } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op/embedding_bag.cpp b/src/frontends/pytorch/src/op/embedding_bag.cpp index a1094265eaf789..633eac2a100ca1 100644 --- a/src/frontends/pytorch/src/op/embedding_bag.cpp +++ b/src/frontends/pytorch/src/op/embedding_bag.cpp @@ -15,10 +15,9 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_embedding_bag(const NodeContext& context) { +OutputVector translate_embedding_bag_common(const NodeContext& context) { // aten::embedding_bag(weight, input, offsets=None, scale_grad_by_freq=False, mode_enum=1, sparse=False, // per_sample_weights=None, include_last_offset=False, padding_idx=None) - num_inputs_check(context, 9, 9); // we have only EmbeddingBagSum case support, check it before translation auto mode = context.const_input(4); PYTORCH_OP_CONVERSION_CHECK(mode == 0, "Only sum mode supported for aten::embedding_bag translation"); @@ -43,7 +42,9 @@ OutputVector translate_embedding_bag(const NodeContext& context) { // with offsets case auto offsets = context.get_input(2); offsets = context.mark_node(std::make_shared(offsets, element::i32)); - auto include_last_offset = context.const_input(7); + bool include_last_offset = false; + if (!context.input_is_none(7)) + include_last_offset = context.const_input(7); PYTORCH_OP_CONVERSION_CHECK(!include_last_offset, "Inclusion last offset is not supported"); // no per_sample_wights if (context.input_is_none(6)) { @@ -63,7 +64,18 @@ OutputVector translate_embedding_bag(const NodeContext& context) { return {result, zero, zero, zero}; }; +OutputVector translate_embedding_bag(const NodeContext& context) { + num_inputs_check(context, 9, 9); + return translate_embedding_bag_common(context); +} + +OutputVector translate_embedding_bag_fx(const NodeContext& context) { + num_inputs_check(context, 7, 9); + ov::OutputVector output = translate_embedding_bag_common(context); + return {context.mark_node(make_list_construct(output))}; +} + } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op/log.cpp b/src/frontends/pytorch/src/op/log.cpp index 3ac3554a286efd..80a526a73e0a65 100644 --- a/src/frontends/pytorch/src/op/log.cpp +++ b/src/frontends/pytorch/src/op/log.cpp @@ -22,7 +22,7 @@ namespace op { using namespace ov::op; -OutputVector translate_log_sigmoid(const NodeContext& context) { +std::shared_ptr translate_log_sigmoid_common(const NodeContext& context) { num_inputs_check(context, 1, 1); auto op_vector = op::translate_1to1_match_1_inputs_with_fp32_type_alignment(context); PYTORCH_OP_CONVERSION_CHECK(op_vector.size() == 1, @@ -30,7 +30,16 @@ OutputVector translate_log_sigmoid(const NodeContext& context) { op_vector.size()); auto sigmoid = op_vector[0]; auto log = context.mark_node(std::make_shared(sigmoid)); - return {log}; + return log; +}; + +OutputVector translate_log_sigmoid(const NodeContext& context) { + return {translate_log_sigmoid_common(context)}; +}; + +OutputVector translate_log_sigmoid_fx(const NodeContext& context) { + auto log = translate_log_sigmoid_common(context); + return {context.mark_node(make_list_construct(log->outputs()))}; }; OutputVector translate_log2(const NodeContext& context) { diff --git a/src/frontends/pytorch/src/op/sort.cpp b/src/frontends/pytorch/src/op/sort.cpp index 02cd6214af1eda..b2f48cf002e925 100644 --- a/src/frontends/pytorch/src/op/sort.cpp +++ b/src/frontends/pytorch/src/op/sort.cpp @@ -9,22 +9,8 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_sort(const NodeContext& context) { - num_inputs_check(context, 3, 4); +OutputVector translate_sort_common(const NodeContext& context, bool stable, int64_t dim, bool descending) { const auto input_tensor = context.get_input(0); - bool stable, descending; - int64_t dim; - - if (context.get_input_size() == 4) { - stable = context.const_input(1); - dim = context.const_input(2); - descending = context.const_input(3); - } else { - stable = false; - dim = context.const_input(1); - descending = context.const_input(2); - } - auto mode = descending ? ov::op::TopKMode::MAX : ov::op::TopKMode::MIN; auto zero_axis = context.mark_node(opset11::Constant::create(element::i32, Shape{1}, {0})); auto dim_axis = context.mark_node(opset11::Constant::create(element::i64, Shape{1}, {dim})); @@ -39,6 +25,42 @@ OutputVector translate_sort(const NodeContext& context) { element::i64, stable)); return topk->outputs(); +} + +OutputVector translate_sort(const NodeContext& context) { + num_inputs_check(context, 3, 4); + bool stable, descending; + int64_t dim; + + if (context.get_input_size() == 4) { + stable = context.const_input(1); + dim = context.const_input(2); + descending = context.const_input(3); + } else { + stable = false; + dim = context.const_input(1); + descending = context.const_input(2); + } + + return translate_sort_common(context, stable, dim, descending); +}; + +OutputVector translate_sort_fx(const NodeContext& context) { + // aten.sort.default(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) + num_inputs_check(context, 1, 3); + bool descending = false; + bool stable = false; + int64_t dim = -1; + + if (!context.input_is_none(1)) { + dim = context.const_input(1); + } + if (!context.input_is_none(2)) { + descending = context.const_input(2); + } + + auto topk_outputs = translate_sort_common(context, stable, dim, descending); + return {context.mark_node(make_list_construct(OutputVector({topk_outputs[0], topk_outputs[1]})))}; }; OutputVector translate_argsort(const NodeContext& context) { diff --git a/src/frontends/pytorch/src/op/split.cpp b/src/frontends/pytorch/src/op/split.cpp index b58c05c1f5bb47..01e14c3b57c2fc 100644 --- a/src/frontends/pytorch/src/op/split.cpp +++ b/src/frontends/pytorch/src/op/split.cpp @@ -25,11 +25,11 @@ OutputVector translate_chunk_fx(const NodeContext& context) { std::shared_ptr chunk; auto dim_val = context.const_input(2); - auto shape = context.get_input(0).get_shape(); + auto shape = context.get_input(0).get_partial_shape(); if (dim_val < 0) { - dim_val = static_cast(shape.size()) + dim_val; + dim_val = static_cast(shape.rank().get_length()) + dim_val; } - int num_splits = static_cast(shape[dim_val]) / num_chunks; + int num_splits = static_cast(shape[dim_val].get_length()) / num_chunks; chunk = context.mark_node(std::make_shared(context.get_input(0), dim, num_splits)); @@ -37,12 +37,17 @@ OutputVector translate_chunk_fx(const NodeContext& context) { } OutputVector translate_unbind_int_fx(const NodeContext& context) { - num_inputs_check(context, 2, 3); + num_inputs_check(context, 1, 3); auto input = context.get_input(0); - auto dim = context.get_input(1); - auto dim_val = context.const_input(1); + Output dim; + int64_t dim_val = 0; + if (context.input_is_none(1)) { + dim = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + } else { + dim = context.get_input(1); + dim_val = context.const_input(1); + } auto shape = input.get_shape(); - if (dim_val < 0) { dim_val = static_cast(shape.size()) + dim_val; } diff --git a/src/frontends/pytorch/src/op/topk.cpp b/src/frontends/pytorch/src/op/topk.cpp index 4a1943a2ae4dae..2fd79f3c3f92a4 100644 --- a/src/frontends/pytorch/src/op/topk.cpp +++ b/src/frontends/pytorch/src/op/topk.cpp @@ -41,6 +41,39 @@ OutputVector translate_topk(const NodeContext& context) { return {topk->output(0), indices}; }; +OutputVector translate_topk_fx(const NodeContext& context) { + // aten.topk.default(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> Tuple[Tensor, Tensor] + num_inputs_check(context, 2, 5); + const auto input_tensor = context.get_input(0); + auto k = context.get_input(1); + int64_t axis{-1}; + bool largest = true; + bool sorted = true; + auto mode = TopKMode::MIN; + auto sort = TopKSortType::NONE; + + if (!context.input_is_none(2)) { + axis = context.const_input(2); + } + if (!context.input_is_none(3)) { + largest = context.const_input(3); + } + if (!context.input_is_none(4)) { + sorted = context.const_input(4); + } + if (largest) { + mode = TopKMode::MAX; + } + if (sorted) { + sort = TopKSortType::SORT_VALUES; + } + + auto topk = context.mark_node(std::make_shared(input_tensor, k, axis, mode, sort)); + auto indices = context.mark_node(std::make_shared(topk->output(1), element::i64)); + + return {context.mark_node(make_list_construct(OutputVector({topk->output(0), indices})))}; +}; + } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 26dc9ef018100a..55d218df430e43 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -245,6 +245,7 @@ OP_CONVERTER(translate_adaptive_max_pool2d_fx); OP_CONVERTER(translate_adaptive_max_pool3d_fx); OP_CONVERTER(translate_addcmul_fx); OP_CONVERTER(translate_addmm_fx); +OP_CONVERTER(translate_any_fx); OP_CONVERTER(translate_arange_fx); OP_CONVERTER(translate_batch_norm_legit_fx); OP_CONVERTER(translate_batch_norm_legit_no_training_fx); @@ -254,6 +255,8 @@ OP_CONVERTER(translate_constant_pad_nd_fx); OP_CONVERTER(translate_cumsum_fx); OP_CONVERTER(translate_chunk_fx); OP_CONVERTER(translate_div_fx); +OP_CONVERTER(translate_div_fx_); +OP_CONVERTER(translate_embedding_bag_fx); OP_CONVERTER(translate_expand_fx); OP_CONVERTER(translate_fake_quantize_per_channel_affine_fx); OP_CONVERTER(translate_fake_quantize_per_tensor_affine_fx); @@ -264,6 +267,7 @@ OP_CONVERTER(translate_group_norm_fx); OP_CONVERTER(translate_index_fx); OP_CONVERTER(translate_layer_norm_fx); OP_CONVERTER(translate_leaky_relu_fx); +OP_CONVERTER(translate_log_sigmoid_fx); OP_CONVERTER(translate_log_softmax_fx); OP_CONVERTER(translate_max_dim_fx); OP_CONVERTER(translate_max_poolnd_fx); @@ -282,10 +286,12 @@ OP_CONVERTER(translate_select_scatter_fx); OP_CONVERTER(translate_slice_fx); OP_CONVERTER(translate_slice_scatter_fx); OP_CONVERTER(translate_softmax_fx); +OP_CONVERTER(translate_sort_fx); OP_CONVERTER(translate_split_with_sizes_fx); OP_CONVERTER(translate_stack_fx); OP_CONVERTER(translate_sub_fx); OP_CONVERTER(translate_sum_fx); +OP_CONVERTER(translate_topk_fx); OP_CONVERTER(translate_to_fx); OP_CONVERTER(translate_transpose_fx); OP_CONVERTER(translate_var_fx); @@ -710,6 +716,7 @@ const std::map get_supported_ops_fx() { {"aten._adaptive_avg_pool2d.default", op::translate_adaptive_avg_pool2d}, {"aten._adaptive_avg_pool3d.default", op::translate_adaptive_avg_pool3d}, {"aten._convolution.default", op::translate_convolution}, + {"aten._embedding_bag.default", op::translate_embedding_bag_fx}, {"aten._fake_quantize_per_tensor_affine_cachemask_tensor_qparams.default", op::translate_fake_quantize_per_tensor_affine_fx}, {"aten._local_scalar_dense.default", op::skip_node}, @@ -735,8 +742,11 @@ const std::map get_supported_ops_fx() { {"aten.addcmul.default", op::translate_addcmul_fx}, {"aten.addmm.default", op::translate_addmm_fx}, {"aten.alias.default", op::skip_node}, + {"aten.all.default", op::translate_all}, {"aten.amax.default", op::translate_amax}, {"aten.amin.default", op::translate_amin}, + {"aten.any.default", op::translate_any_fx}, + {"aten.any.dim", op::translate_any_fx}, {"aten.arange.default", op::translate_arange_fx}, {"aten.arange.start", op::translate_arange_fx}, {"aten.arange.start_step", op::translate_arange_fx}, @@ -773,10 +783,13 @@ const std::map get_supported_ops_fx() { {"aten.cumsum.default", op::translate_cumsum_fx}, {"aten.channel_shuffle.default", op::translate_channel_shuffle}, {"aten.detach.default", op::skip_node}, + {"aten.detach_.default", op::skip_node}, {"aten.div.Scalar", op::translate_div_fx}, {"aten.div.Tensor", op::translate_div_fx}, {"aten.div.Tensor_mode", op::translate_div_fx}, + {"aten.div_.Tensor", op::translate_div_fx_}, {"aten.elu.default", op::translate_elu}, + {"aten.elu_.default", op::inplace_op}, {"aten.embedding.default", op::translate_embedding}, {"aten.empty.memory_format", op::translate_empty}, {"aten.eq.Scalar", op::translate_1to1_match_2_inputs_align_types}, @@ -788,7 +801,9 @@ const std::map get_supported_ops_fx() { {"aten.expand.default", op::translate_expand_fx}, {"aten.fake_quantize_per_channel_affine_cachemask.default", op::translate_fake_quantize_per_channel_affine_fx}, {"aten.fill.Scalar", op::translate_fill}, + {"aten.fill_.Scalar", op::inplace_op}, {"aten.fill.Tensor", op::translate_fill}, + {"aten.fill_.Tensor", op::inplace_op}, {"aten.flip.default", op::translate_flip}, {"aten.floor.default", op::translate_1to1_match_1_inputs}, {"aten.floor_divide.default", op::translate_floor_divide}, @@ -802,6 +817,7 @@ const std::map get_supported_ops_fx() { {"aten.ge.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.gelu.default", op::translate_gelu_fx}, {"aten.glu.default", op::translate_glu}, + {"aten.grid_sampler_2d.default", op::translate_grid_sampler}, {"aten.gt.Scalar", op::translate_1to1_match_2_inputs_align_types}, {"aten.gt.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.hardsigmoid.default", op::translate_1to1_match_1_inputs}, @@ -811,6 +827,9 @@ const std::map get_supported_ops_fx() { {"aten.hardtanh_.default", op::inplace_op}, {"aten.index.Tensor", op::translate_index_fx}, {"aten.index_select.default", op::translate_index_select}, + {"aten.isfinite.default", op::inplace_op>}, + {"aten.isinf.default", op::inplace_op>}, + {"aten.isnan.default", op::inplace_op>}, {"aten.le.Scalar", op::translate_1to1_match_2_inputs_align_types}, {"aten.le.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.leaky_relu.default", op::translate_leaky_relu_fx}, @@ -818,15 +837,17 @@ const std::map get_supported_ops_fx() { {"aten.lift_fresh_copy.default", op::skip_node}, {"aten.linalg_vector_norm.default", op::translate_linalg_vector_norm}, {"aten.log.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, - {"aten.log_sigmoid_forward.default", op::translate_log_sigmoid}, + {"aten.log_sigmoid_forward.default", op::translate_log_sigmoid_fx}, {"aten.log10.default", op::translate_log10}, {"aten.log1p.default", op::translate_log1p}, {"aten.log2.default", op::translate_log2}, {"aten.logsumexp.default", op::translate_logsumexp}, {"aten.lt.Scalar", op::translate_1to1_match_2_inputs_align_types}, {"aten.lt.Tensor", op::translate_1to1_match_2_inputs_align_types}, + {"aten.masked_fill.Scalar", op::translate_masked_fill}, {"aten.masked_fill.Tensor", op::translate_masked_fill}, {"aten.masked_fill_.Scalar", op::inplace_op}, + {"aten.masked_fill_.Tensor", op::inplace_op}, {"aten.max.default", op::translate_max}, {"aten.max.dim", op::translate_max_dim_fx}, {"aten.max_pool2d_with_indices.default", op::translate_max_poolnd_fx}, @@ -872,6 +893,7 @@ const std::map get_supported_ops_fx() { {"aten.rsub.Scalar", op::translate_rsub_fx}, {"aten.rsub.Tensor", op::translate_rsub_fx}, {"aten.scalar_tensor.default", op::translate_scalar_tensor_fx}, + {"aten.scatter.src", op::translate_scatter}, {"aten.scatter.value", op::translate_scatter}, {"aten.select.int", op::translate_select}, {"aten.select_scatter.default", op::translate_select_scatter_fx}, @@ -883,6 +905,7 @@ const std::map get_supported_ops_fx() { {"aten.sinh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.slice.Tensor", op::translate_slice_fx}, {"aten.slice_scatter.default", op::translate_slice_scatter_fx}, + {"aten.sort.default", op::translate_sort_fx}, {"aten.split.Tensor", op::translate_chunk_fx}, {"aten.split_with_sizes.default", op::translate_split_with_sizes_fx}, {"aten.sqrt.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, @@ -896,7 +919,9 @@ const std::map get_supported_ops_fx() { {"aten.t.default", op::translate_t}, {"aten.tan.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.tanh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.topk.default", op::translate_topk_fx}, {"aten.transpose.int", op::translate_transpose}, + {"aten.tril.default", op::translate_tril}, {"aten.unbind.int", op::translate_unbind_int_fx}, {"aten.unfold.default", op::translate_unfold}, {"aten.unsqueeze.default", op::translate_1to1_match_2_inputs}, @@ -909,20 +934,8 @@ const std::map get_supported_ops_fx() { {"aten.zeros.names", op::translate_zeros_fx}, {"aten.zeros_like.default", op::translate_zeros_like_fx}, {"get_attr", op::translate_constant}, - {"prim::Constant", op::translate_constant}, - {"prim::device", op::translate_constant}, - {"prim::GetAttr", op::translate_get_attr}, - {"prim::If", op::translate_if}, - {"prim::is_cuda", op::return_false_scalar}, - {"prim::ListConstruct", op::translate_list_construct}, - {"prim::Loop", op::translate_loop}, - {"prim::NumToTensor", op::skip_node}, // In openvino we already store number as tensor with shape [] - {"prim::PythonOp", op::translate_pythonop}, - {"prim::requires_grad", op::return_false_scalar}, - {"prim::type", op::skip_node}, // Used with prim::device, pass PtFrameworkNode. - {"torchvision::deform_conv2d", op::translate_deform_conv}, - {"torchvision::nms", op::translate_nms}, - {"torchvision::roi_align", op::translate_roi_align}, + {"torchvision.deform_conv2d.default", op::translate_deform_conv}, + {"torchvision.roi_align.default", op::translate_roi_align}, }; }; diff --git a/src/frontends/tensorflow_common/src/op/ones_like.cpp b/src/frontends/tensorflow_common/src/op/ones_like.cpp index 7e6a904dcf247a..6003f26ca3e34c 100644 --- a/src/frontends/tensorflow_common/src/op/ones_like.cpp +++ b/src/frontends/tensorflow_common/src/op/ones_like.cpp @@ -3,9 +3,11 @@ // #include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" #include "openvino/op/shape_of.hpp" #include "openvino/op/squeeze.hpp" #include "utils.hpp" @@ -19,8 +21,28 @@ namespace tensorflow { namespace op { OutputVector translate_ones_like_op(const NodeContext& node) { - default_op_checks(node, 1, {"OnesLike"}); + default_op_checks(node, 1, {"OnesLike"}, true); auto x = node.get_input(0); + auto complex_type_mark_x = as_type_ptr(x.get_node_shared_ptr()); + if (complex_type_mark_x) { + x = complex_type_mark_x->input_value(0); + auto gather_index_real = make_shared(element::i32, Shape{1}, 0); + auto minus_one = make_shared(element::i32, Shape{1}, -1); + auto x_real = make_shared(x, gather_index_real, minus_one)->output(0); + Output shape_of_real = make_shared(x_real, element::i32); + + auto one_const = create_same_type_const_scalar(x_real, 1); + Output ones_like = make_shared(one_const, shape_of_real); + + auto zero_const = create_same_type_const_scalar(x_real, 0); + Output zeros_like = make_shared(zero_const, shape_of_real); + auto result = make_shared(OutputVector{ones_like, zeros_like}, -1); + set_node_name(node.get_name(), result); + auto ones_like_complex = make_shared(result, complex_type_mark_x->get_complex_part_type()); + + return {ones_like_complex}; + } + Output shape_of = make_shared(x, element::i32); auto one_const = create_same_type_const_scalar(x, 1); @@ -35,11 +57,9 @@ OutputVector translate_ones_like_op(const NodeContext& node) { // remove extra dimension by squeezing auto zero_dim_ind = make_shared(element::i32, Shape{1}, 0); ones_like = make_shared(ones_like, zero_dim_ind); - set_node_name(node.get_name(), ones_like.get_node_shared_ptr()); return {ones_like}; } - } // namespace op } // namespace tensorflow } // namespace frontend diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index b9e4afefa6fcb3..2ddd8702eb87fd 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -399,6 +399,52 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) */ static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; +enum class ModelDistributionPolicy { + TENSOR_PARALLEL = 0, // Split tensor into several parts and distribute them between sockets/devices during model + // compilation. At inference time sockets/devices process tensors in parallel and do + // syncronization at the end ensuring mathematical correctness. +}; + +/** @cond INTERNAL */ +inline std::ostream& operator<<(std::ostream& os, const ModelDistributionPolicy& stream_mode) { + switch (stream_mode) { + case ModelDistributionPolicy::TENSOR_PARALLEL: + return os << "TENSOR_PARALLEL"; + default: + OPENVINO_THROW("Unsupported model distribution policy!"); + } +} + +inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& stream_mode) { + std::string str; + is >> str; + if (str == "TENSOR_PARALLEL") { + stream_mode = ModelDistributionPolicy::TENSOR_PARALLEL; + } else { + OPENVINO_THROW("Unsupported model distribution policy: ", str); + } + return is; +} +/** @endcond */ + +/** + * @brief This property defines model distribution policy for inference with multiple sockets/devices. + * @ingroup ov_runtime_cpp_prop_api + * + * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA + * nodes or between different GPUs). + * -- TENSOR_PARALLEL : Split tensor into several parts and distribute them between sockets/devices during model + * compilation. At inference time sockets/devices process tensors in parallel and do syncronization + * at the end ensuring mathematical correctness. + * + * The following code is an example how TENSOR_PARALLEL model disrtibution policy might be enabled. + * + * @code + * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})); + * @endcode + */ +static constexpr Property> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"}; + /** * @brief This property allows CPU pinning during inference. * @ingroup ov_runtime_cpp_prop_api diff --git a/src/inference/tests/functional/ov_register_plugin_test.cpp b/src/inference/tests/functional/ov_register_plugin_test.cpp index a3830743efb919..c494eb14e4d9a0 100644 --- a/src/inference/tests/functional/ov_register_plugin_test.cpp +++ b/src/inference/tests/functional/ov_register_plugin_test.cpp @@ -52,7 +52,9 @@ TEST(RegisterPluginTests, getVersionforRegisteredPluginThrows) { TEST(RegisterPluginTests, getVersionforNoRegisteredPluginNoThrows) { ov::Core core; - ASSERT_NO_THROW(core.get_versions("unkown_device")); + std::map versions; + ASSERT_NO_THROW(versions = core.get_versions("unkown_device")); + ASSERT_TRUE(versions.empty()); auto plugin = std::make_shared>(); diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 8efd078836275c..70da87819f03e5 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -176,7 +176,7 @@ cross_compiled_file(${TARGET_NAME} ARCH AVX512F AVX2 ANY src/nodes/kernels/scaled_attn/attn_memcpy.cpp API src/nodes/kernels/scaled_attn/attn_memcpy.hpp - NAME attn_memcpy + NAME attn_memcpy paged_attn_memcpy NAMESPACE ov::Extensions::Cpu::XARCH ) cross_compiled_file(${TARGET_NAME} diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 6a035444f5844b..039b96c70f824c 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -193,6 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), + RO_property(ov::hint::model_distribution_policy.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), @@ -246,8 +247,11 @@ ov::Any CompiledModel::get_property(const std::string& name) const { const bool use_pin = config.enableCpuPinning; return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin); } else if (name == ov::hint::scheduling_core_type) { - const auto core_type = config.schedulingCoreType; - return core_type; + const auto stream_mode = config.schedulingCoreType; + return stream_mode; + } else if (name == ov::hint::model_distribution_policy) { + const auto distribution_policy = config.modelDistributionPolicy; + return distribution_policy; } else if (name == ov::hint::enable_hyper_threading.name()) { const bool use_ht = config.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(use_ht); diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index cb5d4139b14e98..8567914415e459 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -184,12 +184,26 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { val.as(), "for property key ", ov::hint::scheduling_core_type.name(), - ". Expected only ", - ov::hint::SchedulingCoreType::ANY_CORE, - '/', - ov::hint::SchedulingCoreType::PCORE_ONLY, - '/', - ov::hint::SchedulingCoreType::ECORE_ONLY); + ". Expected only ov::hint::SchedulingCoreType::ANY_CORE/PCORE_ONLY/ECORE_ONLY"); + } + } else if (key == ov::hint::model_distribution_policy.name()) { + auto error_info = [&]() { + OPENVINO_THROW("Wrong value ", + val.as(), + "for property key ", + ov::hint::model_distribution_policy.name(), + ". CPU plugin only support {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}"); + }; + + try { + for (auto& row : val.as>()) { + if ((row != ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL)) { + error_info(); + } + } + modelDistributionPolicy = val.as>(); + } catch (ov::Exception&) { + error_info(); } } else if (key == ov::hint::enable_hyper_threading.name()) { try { diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index f1c79bc046179d..10d7274dc66f7c 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -76,6 +76,7 @@ struct Config { bool enableCpuPinning = true; bool changedCpuPinning = false; ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE; + std::set modelDistributionPolicy = {}; bool enableHyperThreading = true; bool changedHyperThreading = false; Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET; diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 15a4edb5392bab..629fc5b0db2466 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -217,6 +217,7 @@ static const TypeToNameMap& get_type_to_name_tbl() { {"Ngram", Type::Ngram}, {"ScaledDotProductAttention", Type::ScaledDotProductAttention}, {"ScaledDotProductAttentionWithKVCache", Type::ScaledDotProductAttention}, + {"PagedAttentionExtension", Type::ScaledDotProductAttention}, {"RoPE", Type::RoPE}, }; return type_to_name_tbl; diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 5fbe24b4f2b637..244d80038219b3 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -172,6 +172,57 @@ std::set> jit_divide_emitter::get_supported_precision return {{element::f32, element::f32}}; } +/// EQUAL /// +jit_equal_emitter::jit_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); +} +jit_equal_emitter::jit_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); +} + +size_t jit_equal_emitter::get_inputs_count() const { return 2; } + +size_t jit_equal_emitter::get_aux_vecs_count() const { return 1; } + +size_t jit_equal_emitter::get_aux_gprs_count() const { return 1; } + +std::set> jit_equal_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_equal_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_equal_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg src1 = TReg(in_vec_idxs[0]); + const TReg src2 = TReg(in_vec_idxs[1]); + const TReg dst = TReg(out_vec_idxs[0]); + const TReg aux = TReg(aux_vec_idxs[0]); + + h->fcmeq(dst.s, src1.s, src2.s); + + h->ld1r(aux.s, table_val2("one")); + h->and_(dst.b16, dst.b16, aux.b16); +} + +void jit_equal_emitter::register_table_entries() { + push_arg_entry_of("one", 0x3f800000, true); +} /// MUL_ADD /// jit_mul_add_emitter::jit_mul_add_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, @@ -495,6 +546,52 @@ void jit_relu_emitter::emit_isa(const std::vector &in_vec_idxs, const st h->fmaxnm(dst.s, src.s, tmp.s); } +/// SELECT /// +jit_select_emitter::jit_select_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, get_arithmetic_binary_exec_precision(node)) { +} +jit_select_emitter::jit_select_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { +} + +size_t jit_select_emitter::get_inputs_count() const { return 3; } + +size_t jit_select_emitter::get_aux_vecs_count() const { return 1; } + +std::set> jit_select_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32, element::f32}}; +} + +void jit_select_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_select_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg src1 = TReg(in_vec_idxs[0]); + const TReg src2 = TReg(in_vec_idxs[1]); + const TReg src3 = TReg(in_vec_idxs[2]); + const TReg dst = TReg(out_vec_idxs[0]); + const TReg aux = TReg(aux_vec_idxs[0]); + + h->eor(aux.b16, aux.b16, aux.b16); + h->fcmgt(aux.s, src1.s, aux.s); + + h->bsl(aux.b16, src2.b16, src3.b16); + h->mov(dst.b16, aux.b16); +} + /// SUBTRACT /// jit_subtract_emitter::jit_subtract_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index 80e1c2ed7e9c42..58184933e3e1a7 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -84,6 +84,33 @@ class jit_divide_emitter : public jit_emitter { void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_equal_emitter : public jit_emitter { +public: + jit_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& n); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + static std::set> get_supported_precisions( + const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + + void register_table_entries() override; +}; class jit_mul_add_emitter : public jit_emitter { public: @@ -209,6 +236,30 @@ class jit_relu_emitter : public jit_emitter { void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_select_emitter : public jit_emitter { +public: + jit_select_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_select_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& n); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + static std::set> get_supported_precisions( + const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; + class jit_subtract_emitter : public jit_emitter { public: jit_subtract_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index e97c51d4322b73..3ffff01c6da6e7 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1680,6 +1680,10 @@ void Graph::EnforceInferencePrecision() { if (node->getOriginalInputPrecisionAtPort(inPort) != ov::element::f32) return true; + // kvcache of PagedAttention should be written directly + if (node->getType() == Type::ScaledDotProductAttention && node->getOriginalInputsNumber() == 13 && + (inPort == 3 || inPort == 4)) + return true; const auto &parent = node->getParentEdgeAt(inPort)->getParent(); /* Skip BF16 enforcement for nodes after Constant Inputs for maintaining precision for fusing. * Element type conversion to bf16 is done automatically, if convolution follows up after Constant Inputs diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 5965a1e2d4996a..3ddf37b8d6a5c9 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -164,6 +164,15 @@ void Concat::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); if (itr->first != LayoutType::nspc) { pdIndexesToReuse.push_back(supportedPrimitiveDescriptors.size() - 1); + } else if (canBeInPlace) { + // canBeInPlace means all dims before axis are 1, so for nspc layout we only need check sp dimensions in + // axis=1 cases here + const auto& childDims = outputShapes[0].getDims(); + if (axis != 1 || std::all_of(childDims.crbegin(), childDims.crend() - 2, [](const Dim dim) { + return 1 == dim; + })) { + pdIndexesToReuse.push_back(supportedPrimitiveDescriptors.size() - 1); + } } } diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 94c287bb23e15d..112c156652e9f8 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -21,11 +21,13 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseAdd, Algorithm::EltwiseClamp, Algorithm::EltwiseDivide, + Algorithm::EltwiseEqual, Algorithm::EltwiseMultiply, Algorithm::EltwiseMulAdd, Algorithm::EltwisePowerStatic, Algorithm::EltwisePrelu, Algorithm::EltwiseRelu, + Algorithm::EltwiseSelect, Algorithm::EltwiseSubtract); if (!is_supported) { return false; @@ -63,7 +65,13 @@ bool JitEltwiseExecutor::isSupported( // Divide operation doesn't support int32 tensor inference in fp32 precision. // As result Divide operation supports fp16 and fp32 only. std::set { ov::element::f16, ov::element::f32 } : - std::set { ov::element::f16, ov::element::f32, ov::element::i32 }; + std::set { + ov::element::f16, + ov::element::f32, + ov::element::i32, + ov::element::i8, + ov::element::u8 + }; if (!check_precisions(input_precisions, output_precisions, supported_precisions)) { return false; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index 471cb5e853ec7e..1954a65317fde7 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -272,6 +272,32 @@ void jit_uni_eltwise_generic::generate() { } } +namespace utils { +template +void load_vector(const T1& data_lane, + const T2& data_lanes, + const Xbyak_aarch64::XReg &ptr_reg, + const int64_t offset, + const bool broadcast, + jit_generator* h) { + if (broadcast) { + if (offset == 0) { + h->ld1r(data_lane, ptr(ptr_reg)); + } else { + h->add_imm(h->X_DEFAULT_ADDR, ptr_reg, offset, h->X_TMP_0); + h->ld1r(data_lane, ptr(h->X_DEFAULT_ADDR)); + } + } else { + if (offset == 0) { + h->ld1(data_lanes, ptr(ptr_reg)); + } else { + h->add_imm(h->X_DEFAULT_ADDR, ptr_reg, offset, h->X_TMP_0); + h->ld1(data_lanes, ptr(h->X_DEFAULT_ADDR)); + } + } +} +} // namespace utils + template void jit_uni_eltwise_generic::load_vector(const TReg& data, const XReg& ptr_reg, @@ -281,16 +307,7 @@ void jit_uni_eltwise_generic::load_vector(const TReg& data, const int32_t ptr_offset) { switch (src_prc) { case ov::element::f16: { - if (broadcast) { - if (ptr_offset == 0) { - ld1r(data.h, ptr(ptr_reg)); - } else { - add_imm(ptr_reg, ptr_reg, ptr_offset, X_DEFAULT_ADDR); - ld1r(data.h, ptr(ptr_reg)); - } - } else { - ldr(Xbyak_aarch64::DReg(data.getIdx()), Xbyak_aarch64::ptr(ptr_reg, ptr_offset)); - } + utils::load_vector(data.h, data.h4, ptr_reg, ptr_offset, broadcast, this); break; } case ov::element::f32: @@ -302,6 +319,18 @@ void jit_uni_eltwise_generic::load_vector(const TReg& data, } break; } + case ov::element::i8: { + utils::load_vector(data.b, data.s, ptr_reg, ptr_offset, broadcast, this); + sshll(data.h8, data.b8, 0); + sshll(data.s4, data.h4, 0); + break; + } + case ov::element::u8: { + utils::load_vector(data.b, data.s, ptr_reg, ptr_offset, broadcast, this); + ushll(data.h8, data.b8, 0); + ushll(data.s4, data.h4, 0); + break; + } default: { OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string()); } @@ -319,6 +348,14 @@ void jit_uni_eltwise_generic::load_vector(const TReg& data, scvtf(data.s, data.s); break; } + case ov::element::i8: { + scvtf(data.s, data.s); + break; + } + case ov::element::u8: { + ucvtf(data.s, data.s); + break; + } default: OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string()); } @@ -345,6 +382,24 @@ void jit_uni_eltwise_generic::load_scalar(const SReg& data, ldr(data, Xbyak_aarch64::ptr(ptr, ptr_offset)); break; } + case ov::element::i8: { + ldr(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + + // scalar is loaded, operates with vector + TReg vec(data.getIdx()); + sshll(vec.h8, vec.b8, 0); + sshll(vec.s4, vec.h4, 0); + break; + } + case ov::element::u8: { + ldr(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + + // scalar is loaded, operates with vector + TReg vec(data.getIdx()); + ushll(vec.h8, vec.b8, 0); + ushll(vec.s4, vec.h4, 0); + break; + } default: { OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string()); } @@ -358,10 +413,15 @@ void jit_uni_eltwise_generic::load_scalar(const SReg& data, fcvt(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::HReg(data.getIdx())); break; } - case ov::element::i32: { + case ov::element::i32: + case ov::element::i8: { scvtf(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::SReg(data.getIdx())); break; } + case ov::element::u8: { + ucvtf(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::SReg(data.getIdx())); + break; + } default: OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string()); } @@ -390,6 +450,18 @@ void jit_uni_eltwise_generic::store_vector(const XReg& ptr, fcvtns(data.s, data.s); break; } + case ov::element::i8: { + fcvtns(data.s, data.s); + xtn(data.h4, data.s4); + xtn(data.b8, data.h8); + break; + } + case ov::element::u8: { + fcvtnu(data.s, data.s); + xtn(data.h4, data.s4); + xtn(data.b8, data.h8); + break; + } default: { OPENVINO_THROW("dst_prc " + dst_prc.to_string() + " is not supported, src_prc is " + src_prc.to_string()); } @@ -412,6 +484,11 @@ void jit_uni_eltwise_generic::store_vector(const XReg& ptr, str(Xbyak_aarch64::QReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); break; } + case ov::element::i8: + case ov::element::u8: { + str(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + break; + } default: { OPENVINO_THROW("dst_prc " + dst_prc.to_string() + " is not supported, src_ptr is " + src_prc.to_string()); } @@ -436,6 +513,20 @@ void jit_uni_eltwise_generic::store_scalar(const XReg& ptr, fcvtns(data, data); break; } + case ov::element::i8: { + TReg vec_data(data.getIdx()); + fcvtns(vec_data.s, vec_data.s); + xtn(vec_data.h4, vec_data.s4); + xtn(vec_data.b8, vec_data.h8); + break; + } + case ov::element::u8: { + TReg vec_data(data.getIdx()); + fcvtnu(vec_data.s, vec_data.s); + xtn(vec_data.h4, vec_data.s4); + xtn(vec_data.b8, vec_data.h8); + break; + } default: { OPENVINO_THROW("dst_prc " + dst_prc.to_string() + " is not supported, src_prc is " + src_prc.to_string()); } @@ -458,6 +549,11 @@ void jit_uni_eltwise_generic::store_scalar(const XReg& ptr, str(data, Xbyak_aarch64::ptr(ptr, ptr_offset)); break; } + case ov::element::i8: + case ov::element::u8: { + str(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + break; + } default: { OPENVINO_THROW("dst_prc " + src_prc.to_string() + " is not supported, src_prc is " + src_prc.to_string()); } @@ -516,11 +612,13 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseAdd, ov::intel_cpu::aarch64::jit_add_emitter), OV_CASE(Algorithm::EltwiseClamp, ov::intel_cpu::aarch64::jit_clamp_emitter), OV_CASE(Algorithm::EltwiseDivide, ov::intel_cpu::aarch64::jit_divide_emitter), + OV_CASE(Algorithm::EltwiseEqual, ov::intel_cpu::aarch64::jit_equal_emitter), OV_CASE(Algorithm::EltwiseMulAdd, ov::intel_cpu::aarch64::jit_mul_add_emitter), OV_CASE(Algorithm::EltwiseMultiply, ov::intel_cpu::aarch64::jit_multiply_emitter), OV_CASE(Algorithm::EltwisePowerStatic, ov::intel_cpu::aarch64::jit_power_static_emitter), OV_CASE(Algorithm::EltwisePrelu, ov::intel_cpu::aarch64::jit_prelu_emitter), OV_CASE(Algorithm::EltwiseRelu, ov::intel_cpu::aarch64::jit_relu_emitter), + OV_CASE(Algorithm::EltwiseSelect, ov::intel_cpu::aarch64::jit_select_emitter), OV_CASE(Algorithm::EltwiseSubtract, ov::intel_cpu::aarch64::jit_subtract_emitter)); if (!ctx.emitter) @@ -670,10 +768,12 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), OV_CASE(Algorithm::EltwiseClamp, jit_clamp_emitter), OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter), + OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter), OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter), OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter), OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), + OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter)); if (precisions.empty()) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp index 08d9635da9ffd9..c170464eeb47ee 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp @@ -76,6 +76,43 @@ static void attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, }); } +template +static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, + const ov::intel_cpu::PlainTensor& v_input, + const ov::intel_cpu::PlainTensor& past_k_output, + const ov::intel_cpu::PlainTensor& past_v_output, + const ov::intel_cpu::PlainTensor& slot_mapping) { + size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3]; + parallel_for3d(B, H, L1, [&](size_t b, size_t h, size_t m) { + auto block_idx = slot_mapping.ptr(b)[m]; + if (block_idx < 0) return; + attn_copy(past_k_output.ptr(block_idx, h, 0), + k_input.ptr(b, h, m, 0), + S); + attn_copy(past_v_output.ptr(block_idx, h, 0), + v_input.ptr(b, h, m, 0), + S); + }); +} + +static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, + const ov::intel_cpu::PlainTensor& v_input, + const ov::intel_cpu::PlainTensor& past_k_output, + const ov::intel_cpu::PlainTensor& past_v_output, + const ov::intel_cpu::PlainTensor& slot_mapping) { + size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3]; + parallel_for3d(B, H, L1, [&](size_t b, size_t h, size_t m) { + auto block_idx = slot_mapping.ptr(b)[m]; + if (block_idx < 0) return; + std::memcpy(past_k_output.ptr_v(block_idx, h, 0), + k_input.ptr_v(b, h, m, 0), + S * k_input.m_element_size); + std::memcpy(past_v_output.ptr_v(block_idx, h, 0), + v_input.ptr_v(b, h, m, 0), + S * v_input.m_element_size); + }); +} + void attn_memcpy(const ov::intel_cpu::PlainTensor& k_input, const ov::intel_cpu::PlainTensor& v_input, const ov::intel_cpu::PlainTensor& past_k_output, @@ -90,6 +127,23 @@ void attn_memcpy(const ov::intel_cpu::PlainTensor& k_input, OPENVINO_THROW("unsupport src type: ", k_input.get_precision(), ", dst type: ", past_k_output.get_precision(), " in attn_memcpy"); } } + +void paged_attn_memcpy(const ov::intel_cpu::PlainTensor& k_input, + const ov::intel_cpu::PlainTensor& v_input, + const ov::intel_cpu::PlainTensor& past_k_output, + const ov::intel_cpu::PlainTensor& past_v_output, + const ov::intel_cpu::PlainTensor& slot_mapping) { + if (past_k_output.get_precision() == k_input.get_precision()) { + paged_attn_memcpy_kernel(k_input, v_input, past_k_output, past_v_output, slot_mapping); + } else if (k_input.get_precision() == ov::element::f32 && past_k_output.get_precision() == ov::element::f16) { + paged_attn_memcpy_kernel(k_input, v_input, past_k_output, past_v_output, slot_mapping); + } else if (k_input.get_precision() == ov::element::f32 && past_k_output.get_precision() == ov::element::bf16) { + paged_attn_memcpy_kernel(k_input, v_input, past_k_output, past_v_output, slot_mapping); + } else { + OPENVINO_THROW("unsupport src type: ", k_input.get_precision(), ", dst type: ", past_k_output.get_precision(), " in paged_attn_memcpy"); + } +} + } // namespace XARCH } // namespace Cpu } // namespace Extensions diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.hpp index 68bf517475888b..2c44534a8462d7 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.hpp @@ -20,6 +20,12 @@ void attn_memcpy(const ov::intel_cpu::PlainTensor& k_input, const ov::intel_cpu::PlainTensor& past_k_output, const ov::intel_cpu::PlainTensor& past_v_output); +void paged_attn_memcpy(const ov::intel_cpu::PlainTensor& k_input, + const ov::intel_cpu::PlainTensor& v_input, + const ov::intel_cpu::PlainTensor& past_k_output, + const ov::intel_cpu::PlainTensor& past_v_output, + const ov::intel_cpu::PlainTensor& slot_mapping); + } // namespace XARCH } // namespace Cpu } // namespace Extensions diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index 3121a5852d19da..d16f85f154b685 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -594,6 +594,7 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, const ov::intel_cpu::PlainTensor& alibi_mask, const ov::intel_cpu::PlainTensor& attention_mask, const ov::intel_cpu::PlainTensor& beams, + const ov::intel_cpu::PlainTensor& context_lens, ov::intel_cpu::PlainTensor& output_emb, ov::intel_cpu::PlainTensor& buf_attn_w, ov::intel_cpu::PlainTensor& buf_attn_score, @@ -609,15 +610,17 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, auto H = query.size(1); auto q_len = query.size(2); auto S = query.size(3); - auto kv_len = present_key.size(2); auto h_group_num = present_key.size(1); size_t h_each_group_len = 1; + bool is_pagedattn = context_lens; if (h_group_num != H) { h_each_group_len = H / h_group_num; } if (d_scale == 0.0f) d_scale = 1.0f / sqrt(S); auto nthr = parallel_get_max_threads(); + // max kv len + auto kv_len = beams.size(1); // use per-token kernel, for each k,v token // attn mask is a matrix of q_len(kv_len) @@ -642,53 +645,79 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, size_t b, h_group, pk; if (start < end) { parallel_it_init(start, b, B, h_group, h_group_num, pk, kv_len); - if (q_len == 1 && h_each_group_len == 1) { - if (B == 1) { - // the memory will be continuous when b==1 - for (size_t iwork = start; iwork < end; ++iwork) { - auto p = past_k_scale_zp.ptr(0, h_group, pk); - auto p_k = present_key.ptr(0, h_group, pk); - prefetch_bytes(S, _MM_HINT_T0, 4096, p_k); - buf_attn_w.ptr(0, h_group, 0)[pk] = - dot_product(query.ptr(0, h_group), p_k, - S, p, p + 1, head_sum.ptr(0, h_group)); - parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); + if (is_pagedattn) { + for (size_t iwork = start; iwork < end; ++iwork) { + auto context_len = static_cast(context_lens.ptr()[b]); + // kv_len must be valid + if (pk < context_len) { + auto block_idx = beams.ptr(b)[pk]; + OPENVINO_ASSERT(block_idx >= 0, "block idx must be greater or equal than 0"); + + for (size_t pq = 0; pq < q_len; pq++) { + for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { + buf_attn_w.ptr(b, h, pq)[pk] = + dot_product(query.ptr(b, h, pq), present_key.ptr(block_idx, h_group), + S, nullptr, nullptr, nullptr); + } + } + } + parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); + } + } else { + if (q_len == 1 && h_each_group_len == 1) { + if (B == 1) { + // the memory will be continuous when b==1 + for (size_t iwork = start; iwork < end; ++iwork) { + auto p = past_k_scale_zp.ptr(0, h_group, pk); + auto p_k = present_key.ptr(0, h_group, pk); + prefetch_bytes(S, _MM_HINT_T0, 4096, p_k); + buf_attn_w.ptr(0, h_group, 0)[pk] = + dot_product(query.ptr(0, h_group), p_k, + S, p, p + 1, head_sum.ptr(0, h_group)); + parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); + } + } else { + for (size_t iwork = start; iwork < end; ++iwork) { + auto b_kv = beams ? beams.ptr(b)[pk] : b; + auto p = past_k_scale_zp.ptr(b_kv, h_group, pk); + auto p_k = present_key.ptr(b_kv, h_group, pk); + buf_attn_w.ptr(b, h_group, 0)[pk] = + dot_product(query.ptr(b, h_group), p_k, + S, p, p + 1, head_sum.ptr(b, h_group)); + parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); + } } } else { for (size_t iwork = start; iwork < end; ++iwork) { auto b_kv = beams ? beams.ptr(b)[pk] : b; - auto p = past_k_scale_zp.ptr(b_kv, h_group, pk); - auto p_k = present_key.ptr(b_kv, h_group, pk); - buf_attn_w.ptr(b, h_group, 0)[pk] = - dot_product(query.ptr(b, h_group), p_k, - S, p, p + 1, head_sum.ptr(b, h_group)); - parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); - } - } - } else { - for (size_t iwork = start; iwork < end; ++iwork) { - auto b_kv = beams ? beams.ptr(b)[pk] : b; - for (size_t pq = 0; pq < q_len; pq++) { - auto p = past_k_scale_zp.ptr(b_kv, h_group, pk); - for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { - buf_attn_w.ptr(b, h, pq)[pk] = - dot_product(query.ptr(b, h, pq), present_key.ptr(b_kv, h_group, pk), - S, p, p + 1, head_sum.ptr(b, h, pq)); + for (size_t pq = 0; pq < q_len; pq++) { + auto p = past_k_scale_zp.ptr(b_kv, h_group, pk); + for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { + buf_attn_w.ptr(b, h, pq)[pk] = + dot_product(query.ptr(b, h, pq), present_key.ptr(b_kv, h_group, pk), + S, p, p + 1, head_sum.ptr(b, h, pq)); + } } + parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); } - parallel_it_step(b, B, h_group, h_group_num, pk, kv_len); } } } }); parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) { + auto cur_kv_len = kv_len; + auto ncausal = auto_causal ? (cur_kv_len - q_len + pq + 1) : cur_kv_len; + if (is_pagedattn) { + cur_kv_len = static_cast(context_lens.ptr()[b]); + ncausal = cur_kv_len; + } // apply attention mask & sofmax - auto ncausal = auto_causal ? (kv_len - q_len + pq + 1) : kv_len; float* alibi_ptr = alibi_mask ? &alibi_mask.at({b, h, pq, 0}, true) : nullptr; uint8_t* attn_mask_ptr = nullptr; auto attn_mask_prec = attention_mask.get_precision(); - attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, pq, 0}, true)); + if (attention_mask) + attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, pq, 0}, true)); uint8_t* cmask_ptr = causal_mask ? &causal_mask.at({b, h, pq, 0}, true) : nullptr; attn_softmax_kernel(buf_attn_w.ptr(b, h, pq), buf_attn_w.ptr(b, h, pq), @@ -698,7 +727,7 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, cmask_ptr, select_nfltmax_at_0, ncausal, - kv_len, + cur_kv_len, attn_mask_prec, ov::element::f32); }); @@ -715,35 +744,58 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, size_t b, h_group, pv; if (start < end) { parallel_it_init(start, b, B, h_group, h_group_num, pv, kv_len); - if (q_len == 1 && h_each_group_len == 1) { + if (is_pagedattn) { for (size_t iwork = start; iwork < end; ++iwork) { - auto b_kv = beams ? beams.ptr(b)[pv] : b; - auto* v = present_value.ptr(b_kv, h_group, pv); - auto p = past_v_scale_zp.ptr(b_kv, h_group, pv); - attn_acc_value(buf_attn_score.ptr(ithr, b, 0, h_group), - buf_attn_w.ptr(b, h_group, 0, pv)[0], - v, - S, - p + 0, - p + 1); + auto context_len = static_cast(context_lens.ptr()[b]); + // kv_len must be valid + if (pv < context_len) { + auto block_idx = beams.ptr(b)[pv]; + OPENVINO_ASSERT(block_idx >= 0, "block idx in vcache must be greater or equal than 0"); + auto* v = present_value.ptr(block_idx, h_group); + for (size_t pq = 0; pq < q_len; pq++) { + for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { + attn_acc_value(buf_attn_score.ptr(ithr, b, pq, h), + buf_attn_w.ptr(b, h, pq)[pv], + v, + S, + nullptr, + nullptr); + } + } + } parallel_it_step(b, B, h_group, h_group_num, pv, kv_len); } } else { - for (size_t iwork = start; iwork < end; ++iwork) { - auto b_kv = beams ? beams.ptr(b)[pv] : b; - auto* v = present_value.ptr(b_kv, h_group, pv); - auto p = past_v_scale_zp.ptr(b_kv, h_group, pv); - for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { - attn_acc_value(buf_attn_score.ptr(ithr, b, pq, h), - buf_attn_w.ptr(b, h, pq)[pv], - v, - S, - p + 0, - p + 1); + if (q_len == 1 && h_each_group_len == 1) { + for (size_t iwork = start; iwork < end; ++iwork) { + auto b_kv = beams ? beams.ptr(b)[pv] : b; + auto* v = present_value.ptr(b_kv, h_group, pv); + auto p = past_v_scale_zp.ptr(b_kv, h_group, pv); + attn_acc_value(buf_attn_score.ptr(ithr, b, 0, h_group), + buf_attn_w.ptr(b, h_group, 0, pv)[0], + v, + S, + p + 0, + p + 1); + parallel_it_step(b, B, h_group, h_group_num, pv, kv_len); + } + } else { + for (size_t iwork = start; iwork < end; ++iwork) { + auto b_kv = beams ? beams.ptr(b)[pv] : b; + auto* v = present_value.ptr(b_kv, h_group, pv); + auto p = past_v_scale_zp.ptr(b_kv, h_group, pv); + for (size_t pq = 0; pq < q_len; pq++) { + for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { + attn_acc_value(buf_attn_score.ptr(ithr, b, pq, h), + buf_attn_w.ptr(b, h, pq)[pv], + v, + S, + p + 0, + p + 1); + } } + parallel_it_step(b, B, h_group, h_group_num, pv, kv_len); } - parallel_it_step(b, B, h_group, h_group_num, pv, kv_len); } } } @@ -763,6 +815,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, const ov::intel_cpu::PlainTensor& alibi_mask, const ov::intel_cpu::PlainTensor& attention_mask, const ov::intel_cpu::PlainTensor& beams, + const ov::intel_cpu::PlainTensor& context_lens, ov::intel_cpu::PlainTensor& output_emb, ov::intel_cpu::PlainTensor& buf_attn_w, ov::intel_cpu::PlainTensor& buf_attn_score, @@ -780,6 +833,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, alibi_mask, attention_mask, beams, + context_lens, output_emb, buf_attn_w, buf_attn_score, @@ -796,6 +850,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, alibi_mask, attention_mask, beams, + context_lens, output_emb, buf_attn_w, buf_attn_score, @@ -814,6 +869,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, alibi_mask, attention_mask, beams, + context_lens, output_emb, buf_attn_w, buf_attn_score, @@ -830,6 +886,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, alibi_mask, attention_mask, beams, + context_lens, output_emb, buf_attn_w, buf_attn_score, @@ -846,6 +903,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, alibi_mask, attention_mask, beams, + context_lens, output_emb, buf_attn_w, buf_attn_score, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.hpp index e29e2bae0aa07a..07edc33d914a69 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.hpp @@ -21,6 +21,7 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, const ov::intel_cpu::PlainTensor& alibi_mask, const ov::intel_cpu::PlainTensor& attention_mask, const ov::intel_cpu::PlainTensor& beams, + const ov::intel_cpu::PlainTensor& context_lens, ov::intel_cpu::PlainTensor& output_emb, ov::intel_cpu::PlainTensor& buf_attn_w, ov::intel_cpu::PlainTensor& buf_attn_score, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp index 6aea1119788e92..a1aeaaecf9ae17 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp @@ -45,8 +45,9 @@ BrgemmKernel::BrgemmKernel(size_t M, THROW_ERROR("brgemm bf16 kernel could only be used above avx512_bf16"); bool isAMXSupported = is_bf16 && mayiuse(avx512_core_amx); + size_t vlen = cpu_isa_traits::vlen; // blocking N - N_blk = is_bf16 ? 32 : N; + N_blk = is_bf16 ? 32 : std::max(N, vlen / inType.size()); N_tail = N % N_blk; // blocking K @@ -55,7 +56,6 @@ BrgemmKernel::BrgemmKernel(size_t M, if (isAMXSupported && K_tail) { K_tail = rnd_up(K_tail, 2); } - size_t vlen = cpu_isa_traits::vlen; // copied K must be round up by vlen / inType.size(), otherwise copy B kernel may access wrong memory packedBSize = rnd_up(K, vlen / inType.size()) * rnd_up(N, N_blk) * inType.size(); size_t brg0BaseIdx = std::numeric_limits::max(); diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index ffe1f6b39cf412..a5ca0425c50bc7 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -125,7 +125,8 @@ struct MHAKernel { PlainTensor& output_emb, bool has_out_transpose, bool auto_causal, - float d_scale = 0.0f) { + float d_scale = 0.0f, + size_t sliding_window = 0) { auto B = query.size(0); auto H = query.size(1); auto q_len = query.size(2); @@ -177,7 +178,18 @@ struct MHAKernel { } // softmax - softmax(&attn_score[0], ncausal); + if (sliding_window) { + size_t start_idx = 0; + auto new_causal = ncausal; + if (ncausal > sliding_window) { + start_idx = ncausal - static_cast(sliding_window); + new_causal = sliding_window; + } + softmax(&attn_score[start_idx], new_causal); + memset(&attn_score[0], 0, sizeof(float) * start_idx); + } else { + softmax(&attn_score[0], ncausal); + } // linearly combine value word_vec.assign(head_size, 0.0f); @@ -292,7 +304,10 @@ struct MHAKernel { qk_gemm_ptr = qk_result.first; dnnl::memory::desc attn_md(make_dnnl_dims({B, H, q_len, kv_len}), dt::f32, tag::abcd); weight_md = dnnl::memory::desc(make_dnnl_dims({B, H, q_len, kv_len}), qkv_dt, tag::abcd); - out_md = dnnl::memory::desc(make_dnnl_dims({B, H, q_len, head_size}), qkv_dt, tag::abcd); + if (has_out_transpose) + out_md = dnnl::memory::desc(make_dnnl_dims({B, q_len, H, head_size}), qkv_dt, tag::abcd); + else + out_md = dnnl::memory::desc(make_dnnl_dims({B, H, q_len, head_size}), qkv_dt, tag::abcd); size_t ldc_index = 2; if (has_out_transpose) { @@ -347,7 +362,8 @@ struct MHAKernel { PlainTensor& output_emb, bool has_out_transpose, bool auto_causal, - float d_scale = 0.0f) { + float d_scale = 0.0f, + size_t sliding_window = 0) { const auto B = query.size(0); const auto H = query.size(1); const auto q_len = query.size(2); @@ -410,22 +426,47 @@ struct MHAKernel { for (size_t m = m_start; m < m_end; m++) { // apply attention mask & sofmax auto ncausal = auto_causal ? (kv_len - q_len + m + 1) : kv_len; - attn_softmax(&score.at({b, h, m, 0}), - &weight.at({b, h, m, 0}), - d_scale, - alibi_ptr + m * alibi_stride, - attn_mask_ptr + m * attn_mask_stride, - cmask_ptr + m * cmask_stride, - select_nfltmax_at_0, - ncausal, - kv_len, - precision_of::value, - precision_of::value); + if (sliding_window) { + size_t start_idx = 0; + auto new_causal = ncausal; + if (ncausal > sliding_window) { + start_idx = ncausal - static_cast(sliding_window); + new_causal = sliding_window; + } + attn_softmax(&score.at({b, h, m, start_idx}), + &weight.at({b, h, m, start_idx}), + d_scale, + alibi_ptr + m * alibi_stride, + attn_mask_ptr + m * attn_mask_stride, + cmask_ptr + m * cmask_stride, + select_nfltmax_at_0, + new_causal, + kv_len - start_idx, + precision_of::value, + precision_of::value); + + memset(&weight.at({b, h, m, 0}), 0, sizeof(T) * start_idx); + } else { + attn_softmax(&score.at({b, h, m, 0}), + &weight.at({b, h, m, 0}), + d_scale, + alibi_ptr + m * alibi_stride, + attn_mask_ptr + m * attn_mask_stride, + cmask_ptr + m * cmask_stride, + select_nfltmax_at_0, + ncausal, + kv_len, + precision_of::value, + precision_of::value); + } } T* w_ptr = &weight.at({b, h, m_start, 0}); - PlainTensor& sdpa_out = is_bf16 ? fp32_out : output_emb; - float* fp32_out_ptr = - has_out_transpose ? &sdpa_out.at({b, m_start, h, 0}) : &sdpa_out.at({b, h, m_start, 0}); + float* fp32_out_ptr; + if (is_bf16) { + fp32_out_ptr = has_out_transpose ? &fp32_out.at({b, m_start, h, 0}) : &fp32_out.at({b, h, m_start, 0}); + } else { + fp32_out_ptr = has_out_transpose ? &output_emb.at({b, m_start, h * head_size}) : &output_emb.at({b, h, m_start, 0}); + } T* v_ptr = is_bf16 ? &wv_scratch_b.at({b, h / h_each_group_len, 0}) : &present_value.at({b, h / h_each_group_len, 0, 0}); wv_gemm_ptr->executeGemm(m_cnt < m_block_size, @@ -435,11 +476,21 @@ struct MHAKernel { wsp.data() + tid * wsp_size_per_thread, wv_scratch_a ? &wv_scratch_a.at({tid, 0}) : nullptr); if (is_bf16) { - cpu_convert(&fp32_out.at({b, h, m_start, 0}), - &output_emb.at({b, h, m_start, 0}), - ov::element::f32, - ov::element::bf16, - m_cnt * head_size); + if (has_out_transpose) { + for (size_t m = m_start; m < m_end; m++) { + cpu_convert(&fp32_out.at({b, m, h, 0}), + &output_emb.at({b, m, h * head_size}), + ov::element::f32, + ov::element::bf16, + head_size); + } + } else { + cpu_convert(&fp32_out.at({b, h, m_start, 0}), + &output_emb.at({b, h, m_start, 0}), + ov::element::f32, + ov::element::bf16, + m_cnt * head_size); + } } }); } @@ -467,7 +518,8 @@ struct MHAKernel { PlainTensor& output_emb, bool has_out_transpose, bool auto_causal, - float d_scale = 0.0f) { + float d_scale = 0.0f, + size_t sliding_window = 0) { auto head_size = query.size(3); if (d_scale == 0.0f) d_scale = 1.0f / sqrt(head_size); @@ -481,7 +533,8 @@ struct MHAKernel { output_emb, has_out_transpose, auto_causal, - d_scale); + d_scale, + sliding_window); } }; @@ -523,7 +576,8 @@ struct MHAKernel { PlainTensor& output_emb, bool has_out_transpose, bool auto_causal, - float d_scale = 0.0f) { + float d_scale = 0.0f, + size_t sliding_window = 0) { auto B = query.size(0); auto H = query.size(1); auto q_len = query.size(2); @@ -613,17 +667,39 @@ struct MHAKernel { for (size_t m = m_start; m < m_end; m++) { // apply attention mask & sofmax auto ncausal = auto_causal ? (kv_len - q_len + m + 1) : kv_len; - attn_softmax(qk + (m - m_start) * qk_m_stride, - qk + (m - m_start) * qk_m_stride, - d_scale, - alibi_ptr + m * alibi_stride, - attn_mask_ptr + m * attn_mask_stride, - cmask_ptr + m * cmask_stride, - select_nfltmax_at_0, - ncausal, - kv_len, - ov::element::f32, - ov::element::f32); + if (sliding_window) { + size_t start_idx = 0; + auto new_causal = ncausal; + if (ncausal > sliding_window) { + start_idx = ncausal - static_cast(sliding_window); + new_causal = sliding_window; + } + attn_softmax(qk + (m - m_start) * qk_m_stride + start_idx, + qk + (m - m_start) * qk_m_stride + start_idx, + d_scale, + alibi_ptr + m * alibi_stride, + attn_mask_ptr + m * attn_mask_stride, + cmask_ptr + m * cmask_stride, + select_nfltmax_at_0, + new_causal, + kv_len - start_idx, + ov::element::f32, + ov::element::f32); + + memset(qk + (m - m_start) * qk_m_stride, 0, sizeof(float) * start_idx); + } else { + attn_softmax(qk + (m - m_start) * qk_m_stride, + qk + (m - m_start) * qk_m_stride, + d_scale, + alibi_ptr + m * alibi_stride, + attn_mask_ptr + m * attn_mask_stride, + cmask_ptr + m * cmask_stride, + select_nfltmax_at_0, + ncausal, + kv_len, + ov::element::f32, + ov::element::f32); + } } mlas_sgemm("N", "N", @@ -666,12 +742,13 @@ struct MHASingleToken { const PlainTensor& attention_mask, PlainTensor& output_emb, const PlainTensor& beams, + const PlainTensor& context_lens, bool has_out_transpose, bool auto_causal, float d_scale, const PlainTensor& k_scale_zp, const PlainTensor& v_scale_zp) { - mha_single_token(query, present_key, present_value, alibi_mask, attention_mask, beams, output_emb, + mha_single_token(query, present_key, present_value, alibi_mask, attention_mask, beams, context_lens, output_emb, m_attn_w, m_temp, has_out_transpose, auto_causal, d_scale, k_scale_zp, v_scale_zp, m_head_sum); } }; @@ -700,66 +777,108 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt bool fuse_causal_attn = config.config.fuse_causal_attn; bool is_causal = config.config.is_causal; bool fuse_concat = config.config.fuse_concat; + bool is_pagedattn = config.is_pageattn; auto input_num = inputs.size(); + bool is_prompt = false; PlainTensor present_key, present_value; PlainTensor q_input; // f32[B, H, L1, S] PlainTensor k_input; // f32[B, H|1, L1, S] / [B, H|1, L0+L1, S] PlainTensor v_input; // f32[B, H|1, L1, S] / [B, H|1, L0+L1, S] PlainTensor beam_table; // i32[B, max_kvLen] + PlainTensor context_lens; + PlainTensor attn_mask; + PlainTensor output_emb(output); float scale_input = 0.0f; size_t B, L1, L0, S; + size_t sliding_window = 0; q_input.reset(inputs[0]); k_input.reset(inputs[1]); v_input.reset(inputs[2]); present_key.reset(presentk_input); present_value.reset(presentv_input); - if (beam_input) - beam_table.reset(beam_input); - PlainTensor attn_mask; - if (input_num > 3) { - // attn_mask - if (inputs[3]->getDesc().getPrecision() == ov::element::u8) { - // bool->f32 - prepare_attn_mask(inputs[3]); - attn_mask = attn_buf; + if (is_pagedattn) { + is_prompt = *inputs[ID_IS_PROMPT]->getDataAs() == 1; + //auto max_context_len = static_cast(*inputs[ID_MAX_CONTEXT_LEN]->getDataAs()); + context_lens.reset(inputs[ID_CONTEXT_LENS]); + beam_table.reset(inputs[ID_BLOCK_TABLES]); + scale_input = *inputs[ID_SCALE]->getDataAs(); + // TODO: alibi and sliding window + // no attn mask, auto-generated casual mask + is_causal = true; + has_out_transpose = true; + + // q: [B, L1, H*S], kv: [B, L1, Hk*S] + // k_cache: [NUM_BLOCKS, Hk, S / 4, BLOCK_SIZE, 4] + // v_cache: [NUM_BLOCKS, Hk, S, BLOCK_SIZE] + // context_lens: [B] + // block_tables: [B, max_block_per_request] + B = k_input.size(0); + L1 = k_input.size(1); + auto Hk = present_key.size(1); + S = present_value.size(2); + auto H = q_input.size(2) / S; + // L0 in each batch may be different + L0 = 0; + + q_input.assert_dims({B, L1, H * S}); + if (!is_prompt) { + context_lens.assert_dims({B}); + beam_table.assert_dims({B, 0}, true); } else { - attn_mask.reset(inputs[3]); + sliding_window = static_cast(*inputs[ID_SLIDING_WINDOW]->getDataAs()); } - // if has scale, attn_mask must be present - if (input_num > 4) { - scale_input = *inputs[4]->getDataAs(); + output_emb.assert_dims({B, L1, H * S}); + q_input = q_input.reshape({B, L1, H, S}).permute({0, 2, 1, 3}); + k_input = k_input.reshape({B, L1, Hk, S}).permute({0, 2, 1, 3}); + v_input = v_input.reshape({B, L1, Hk, S}).permute({0, 2, 1, 3}); + present_key = present_key.reshape({present_key.size(0), Hk, S}); + present_value = present_value.reshape({present_value.size(0), Hk, S}); + } else { + if (beam_input) + beam_table.reset(beam_input); + if (input_num > 3) { + // attn_mask + if (inputs[3]->getDesc().getPrecision() == ov::element::u8) { + // bool->f32 + prepare_attn_mask(inputs[3]); + attn_mask = attn_buf; + } else { + attn_mask.reset(inputs[3]); + } + // if has scale, attn_mask must be present + if (input_num > 4) { + scale_input = *inputs[4]->getDataAs(); + } } - } - // q: [B, H, L1, S] - const auto & permute_axes = config.config.permute_axes; - if (!permute_axes.empty()) { - q_input = q_input.permute(permute_axes); - k_input = k_input.permute(permute_axes); - v_input = v_input.permute(permute_axes); - present_key = present_key.permute(permute_axes); - present_value = present_value.permute(permute_axes); - } - B = q_input.size(0); - L1 = q_input.size(2); - S = q_input.size(3); - L0 = present_key.size(2) - L1; - auto Hk = k_input.size(1); - - if (fuse_concat) { - k_input.assert_dims({B, Hk, L1, S}); - v_input.assert_dims({B, Hk, L1, S}); - } else { - k_input.assert_dims({B, Hk, L0 + L1, S}); - v_input.assert_dims({B, Hk, L0 + L1, S}); + // q: [B, H, L1, S] + const auto & permute_axes = config.config.permute_axes; + if (!permute_axes.empty()) { + q_input = q_input.permute(permute_axes); + k_input = k_input.permute(permute_axes); + v_input = v_input.permute(permute_axes); + present_key = present_key.permute(permute_axes); + present_value = present_value.permute(permute_axes); + } + B = q_input.size(0); + L1 = q_input.size(2); + S = q_input.size(3); + L0 = present_key.size(2) - L1; + auto Hk = k_input.size(1); + + if (fuse_concat) { + k_input.assert_dims({B, Hk, L1, S}); + v_input.assert_dims({B, Hk, L1, S}); + } else { + k_input.assert_dims({B, Hk, L0 + L1, S}); + v_input.assert_dims({B, Hk, L0 + L1, S}); + } + present_key.assert_dims({B, Hk, L0 + L1, S}); + present_value.assert_dims({B, Hk, L0 + L1, S}); + if (beam_table) + beam_table.assert_dims({B, L0 + L1}); } - present_key.assert_dims({B, Hk, L0 + L1, S}); - present_value.assert_dims({B, Hk, L0 + L1, S}); - if (beam_table) - beam_table.assert_dims({B, L0 + L1}); - - ov::intel_cpu::PlainTensor output_emb(output); bool auto_causal; bool use_attn_mask; @@ -791,11 +910,15 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt } // second token, or first token with pastkv fusing - bool use_one_token = L1 == 1 || (fuse_concat && L0 > 0); + bool use_one_token; + if (is_pagedattn) + use_one_token = !is_prompt; + else + use_one_token = L1 == 1 || (fuse_concat && L0 > 0); if (!use_one_token) { // multi-token version kernel(strm, q_input, k_input, v_input, {}, use_attn_mask ? attn_mask : PlainTensor(), - output_emb, has_out_transpose, auto_causal, scale_input); + output_emb, has_out_transpose, auto_causal, scale_input, sliding_window); } else { // 1-token version // for second token, using a special AVX2/AVX512 float path: @@ -803,7 +926,7 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt // 2, using float will save the repack cost which typically is required for bf16/int8 opt // 3, using dot product can leverage the SIMD while easily adapt to indirect kv cache kernel_single_token(q_input, present_key, present_value, {}, use_attn_mask ? attn_mask : PlainTensor(), - output_emb, beam_table, has_out_transpose, auto_causal, scale_input, k_scale_zp, v_scale_zp); + output_emb, beam_table, context_lens, has_out_transpose, auto_causal, scale_input, k_scale_zp, v_scale_zp); } } }; @@ -815,12 +938,18 @@ ScaledDotProductAttention::ScaledDotProductAttention(const std::shared_ptr(op); - if (node) { - m_config.config.is_causal = node->get_causal(); + if (op->get_type_name() == std::string("PagedAttentionExtension")) { + m_is_pageattn = true; + m_config.is_pageattn = true; } else { - const auto node = std::dynamic_pointer_cast(op); - m_config.config = node->get_config(); + m_is_pageattn = false; + const auto node = std::dynamic_pointer_cast(op); + if (node) { + m_config.config.is_causal = node->get_causal(); + } else { + const auto node = std::dynamic_pointer_cast(op); + m_config.config = node->get_config(); + } } } @@ -840,49 +969,83 @@ void ScaledDotProductAttention::initSupportedPrimitiveDescriptors() { rtPrecision, getInputShapeAtPort(1))); config.inConfs[2].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( rtPrecision, getInputShapeAtPort(2))); - auto nextPortIdx = 3; - if (orginSDPInputNumber > 3) { - // attn_mask - if (getOriginalInputPrecisionAtPort(nextPortIdx) == ov::element::u8) { - config.inConfs[nextPortIdx].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - ov::element::u8, getInputShapeAtPort(nextPortIdx))); - } else { + if (m_is_pageattn) { + OPENVINO_ASSERT(getOriginalInputsNumber() == 13, "The input number of PagedAttention should be 13."); + // kvcache, float, [] + auto past_kv_input_mem_precision = getOriginalInputPrecisionAtPort(ID_KCACHE); + config.inConfs[ID_KCACHE].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + past_kv_input_mem_precision, getInputShapeAtPort(ID_KCACHE))); + config.inConfs[ID_VCACHE].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + past_kv_input_mem_precision, getInputShapeAtPort(ID_VCACHE))); + // is_prompt, bool, [] + config.inConfs[ID_IS_PROMPT].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::u8, getInputShapeAtPort(ID_IS_PROMPT))); + // slot_mapping, int, [batch_size, max_context_len] + config.inConfs[ID_SLOT_MAPPING].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::i32, getInputShapeAtPort(ID_SLOT_MAPPING))); + // max_context_len, int, [] + config.inConfs[ID_MAX_CONTEXT_LEN].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::i32, getInputShapeAtPort(ID_MAX_CONTEXT_LEN))); + // context_lens, int, [batch_size] + config.inConfs[ID_CONTEXT_LENS].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::i32, getInputShapeAtPort(ID_CONTEXT_LENS))); + // block_tables, int, [batch_size, max_block_per_request] + config.inConfs[ID_BLOCK_TABLES].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::i32, getInputShapeAtPort(ID_BLOCK_TABLES))); + // scale, float, [] + config.inConfs[ID_SCALE].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::f32, getInputShapeAtPort(ID_SCALE))); + // alibi_slopes, float, [?] or nullptr + config.inConfs[ID_ALIBI_SLOPES].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::f32, getInputShapeAtPort(ID_ALIBI_SLOPES))); + // sliding_window, int, [] + config.inConfs[ID_SLIDING_WINDOW].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::i32, getInputShapeAtPort(ID_SLIDING_WINDOW))); + } else { + auto nextPortIdx = 3; + if (orginSDPInputNumber > 3) { + // attn_mask + if (getOriginalInputPrecisionAtPort(nextPortIdx) == ov::element::u8) { + config.inConfs[nextPortIdx].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::u8, getInputShapeAtPort(nextPortIdx))); + } else { + config.inConfs[nextPortIdx].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + rtPrecision, getInputShapeAtPort(nextPortIdx))); + } + nextPortIdx++; + } + if (orginSDPInputNumber > 4) { config.inConfs[nextPortIdx].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - rtPrecision, getInputShapeAtPort(nextPortIdx))); + ov::element::f32, getInputShapeAtPort(nextPortIdx))); } - nextPortIdx++; - } - if (orginSDPInputNumber > 4) { - config.inConfs[nextPortIdx].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - ov::element::f32, getInputShapeAtPort(nextPortIdx))); - } - if (m_config.config.fuse_concat) { - // beam_idx - config.inConfs[orginSDPInputNumber + 0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - ov::element::i32, getInputShapeAtPort(orginSDPInputNumber + 0))); - - // Since the InputMemory nodes are simple proxy for the state memory as well as the init subgraph memory, - // it doesn't make sense to set the real KV cache precision, since we don't need any precision conversions - // provided by the common graph logic. We set precisions equal to the precisions of the state nodes to avoid - // reorder insertion in between MemoryInputSDPA and SDPA nodes. - - auto past_k_input_mem_precision = getParentEdgeAt(orginSDPInputNumber + 1)->getParent()->getOriginalOutputPrecisionAtPort(0); - // pastk - config.inConfs[orginSDPInputNumber + 1].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - past_k_input_mem_precision, getInputShapeAtPort(orginSDPInputNumber + 1))); - - auto past_v_input_mem_precision = getParentEdgeAt(orginSDPInputNumber + 2)->getParent()->getOriginalOutputPrecisionAtPort(0); - // pastv - config.inConfs[orginSDPInputNumber + 2].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - past_v_input_mem_precision, getInputShapeAtPort(orginSDPInputNumber + 2))); - - config.outConfs[1].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - past_k_input_mem_precision, getOutputShapeAtPort(1))); - config.outConfs[1].inPlace(-1); - config.outConfs[2].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( - past_v_input_mem_precision, getOutputShapeAtPort(2))); - config.outConfs[2].inPlace(-1); + if (m_config.config.fuse_concat) { + // beam_idx + config.inConfs[orginSDPInputNumber + 0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + ov::element::i32, getInputShapeAtPort(orginSDPInputNumber + 0))); + + // Since the InputMemory nodes are simple proxy for the state memory as well as the init subgraph memory, + // it doesn't make sense to set the real KV cache precision, since we don't need any precision conversions + // provided by the common graph logic. We set precisions equal to the precisions of the state nodes to avoid + // reorder insertion in between MemoryInputSDPA and SDPA nodes. + + auto past_k_input_mem_precision = getParentEdgeAt(orginSDPInputNumber + 1)->getParent()->getOriginalOutputPrecisionAtPort(0); + // pastk + config.inConfs[orginSDPInputNumber + 1].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + past_k_input_mem_precision, getInputShapeAtPort(orginSDPInputNumber + 1))); + + auto past_v_input_mem_precision = getParentEdgeAt(orginSDPInputNumber + 2)->getParent()->getOriginalOutputPrecisionAtPort(0); + // pastv + config.inConfs[orginSDPInputNumber + 2].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + past_v_input_mem_precision, getInputShapeAtPort(orginSDPInputNumber + 2))); + + config.outConfs[1].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + past_k_input_mem_precision, getOutputShapeAtPort(1))); + config.outConfs[1].inPlace(-1); + config.outConfs[2].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( + past_v_input_mem_precision, getOutputShapeAtPort(2))); + config.outConfs[2].inPlace(-1); + } } config.outConfs[0].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc( @@ -941,24 +1104,34 @@ void ScaledDotProductAttention::execute(dnnl::stream strm) { } PlainTensor k_scale_zp, v_scale_zp; - if (m_config.config.fuse_concat) { - // initialization will be also completed in this func - gatherConcatPastkv(inputs[1], inputs[2], getSrcMemoryAtPort(orginSDPInputNumber)); - - presentk_input = m_k_state->internal_state_mem(); - presentv_input = m_v_state->internal_state_mem(); - beam_input = m_k_state->hidden_state_mem(); - k_scale_zp = m_k_state->get_scale_zp(); - v_scale_zp = m_v_state->get_scale_zp(); + if (m_is_pageattn) { + gatherConcatPastkvForPagedAttn(inputs); + + presentk_input = inputs[ID_KCACHE]; + presentv_input = inputs[ID_VCACHE]; } else { - presentk_input = inputs[1]; - presentv_input = inputs[2]; + if (m_config.config.fuse_concat) { + // initialization will be also completed in this func + gatherConcatPastkv(inputs[1], inputs[2], getSrcMemoryAtPort(orginSDPInputNumber)); + + presentk_input = m_k_state->internal_state_mem(); + presentv_input = m_v_state->internal_state_mem(); + beam_input = m_k_state->hidden_state_mem(); + k_scale_zp = m_k_state->get_scale_zp(); + v_scale_zp = m_v_state->get_scale_zp(); + } else { + presentk_input = inputs[1]; + presentv_input = inputs[2]; + } } m_executor->execute(strm, m_config, inputs, output, presentk_input, presentv_input, beam_input, k_scale_zp, v_scale_zp); } bool ScaledDotProductAttention::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (op->get_type_name() == std::string("PagedAttentionExtension")) { + return true; + } if (!std::dynamic_pointer_cast(op) && !std::dynamic_pointer_cast(op)) { errorMessage = "Only ScaledDotProductAttention or ScaledDotProductAttentionWithKVCache operation are supported"; @@ -1161,6 +1334,33 @@ void ScaledDotProductAttention::resetBeamTablePastkv(const MemoryPtr& mem_cur_k, } } +void ScaledDotProductAttention::gatherConcatPastkvForPagedAttn(const std::vector& inputs) { + PlainTensor k, v, k_cache, v_cache, slot_mapping; + + k.reset(inputs[ID_K]); // [B, L1, H * S] + v.reset(inputs[ID_V]); + k_cache.reset(inputs[ID_KCACHE]); // [NUM_BLOCKS, H, S / 4, BLOCK_SIZE, 4] + v_cache.reset(inputs[ID_VCACHE]); // [NUM_BLOCKS, H, S, BLOCK_SIZE] + slot_mapping.reset(inputs[ID_SLOT_MAPPING]); // [B, max_context_len] + + auto B = k.size(0); + auto L1 = k.size(1); + auto H = k_cache.size(1); + auto S = v_cache.size(2); + + k.assert_dims({B, L1, H * S}); + v.assert_dims({B, L1, H * S}); + k_cache.assert_dims({0, H, 0, 1, 0}, true); + v_cache.assert_dims({0, H, S, 1}, true); + slot_mapping.assert_dims({B, 0}, true); + k = k.reshape({B, L1, H, S}).permute({0, 2, 1, 3}); + v = v.reshape({B, L1, H, S}).permute({0, 2, 1, 3}); + k_cache = k_cache.reshape({k_cache.size(0), H, S}); + v_cache = v_cache.reshape({v_cache.size(0), H, S}); + paged_attn_memcpy(k, v, k_cache, v_cache, slot_mapping); + // TODO: add u8 kvcache support +} + void ScaledDotProductAttention::gatherConcatPastkv(const MemoryPtr& mem_cur_k, const MemoryPtr& mem_cur_v, const MemoryPtr& mem_beam_idx) { PlainTensor cur_k; cur_k.reset(mem_cur_k); diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.h b/src/plugins/intel_cpu/src/nodes/scaled_attn.h index d4ae2df8c7688a..38980d07e131e0 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.h +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.h @@ -48,6 +48,7 @@ class ScaledDotProductAttention : public Node { private: void gatherConcatPastkv(const MemoryPtr& mem_cur_k, const MemoryPtr& mem_cur_v, const MemoryPtr& mem_beam_idx); + void gatherConcatPastkvForPagedAttn(const std::vector& inputs); void updateBeamTable(const MemoryPtr& mem_beam_idx, size_t new_q_len); void updatePastkv(const MemoryPtr& mem_cur_k, const MemoryPtr& mem_cur_v); ov::element::Type getRuntimePrecision() const override; @@ -55,6 +56,7 @@ class ScaledDotProductAttention : public Node { struct Config { ScaledDotProductAttentionWithKVCache::Config config; + bool is_pageattn = false; }; struct Executor { @@ -63,6 +65,7 @@ class ScaledDotProductAttention : public Node { const PlainTensor& k_scale_zp, const PlainTensor& v_scale_zp) = 0; }; + bool m_is_pageattn; Config m_config; std::shared_ptr m_executor; template struct AttentionExecutor; @@ -70,6 +73,21 @@ class ScaledDotProductAttention : public Node { std::shared_ptr m_k_state; std::shared_ptr m_v_state; + + // PagedAttention input index + static const size_t ID_Q = 0; + static const size_t ID_K = 1; + static const size_t ID_V = 2; + static const size_t ID_KCACHE = 3; + static const size_t ID_VCACHE = 4; + static const size_t ID_IS_PROMPT = 5; + static const size_t ID_SLOT_MAPPING = 6; + static const size_t ID_MAX_CONTEXT_LEN = 7; + static const size_t ID_CONTEXT_LENS = 8; + static const size_t ID_BLOCK_TABLES = 9; + static const size_t ID_SCALE = 10; + static const size_t ID_ALIBI_SLOPES = 11; + static const size_t ID_SLIDING_WINDOW = 12; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 0f78f3c6e22f9d..e8d502fc0d3922 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -409,6 +409,9 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) } else if (name == ov::hint::scheduling_core_type) { const auto core_type = engConfig.schedulingCoreType; return core_type; + } else if (name == ov::hint::model_distribution_policy) { + const auto distribution_policy = engConfig.modelDistributionPolicy; + return distribution_policy; } else if (name == ov::hint::enable_hyper_threading) { const bool ht_value = engConfig.enableHyperThreading; return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value); @@ -481,6 +484,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), + RW_property(ov::hint::model_distribution_policy.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp index e00e5cae8aae74..ae7d6e0dd1efd6 100644 --- a/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp @@ -53,7 +53,26 @@ class SDPAShapeInfer : public ShapeInferEmptyPads { ScaledDotProductAttentionWithKVCache::Config m_config; }; +class PAShapeInfer : public ShapeInferEmptyPads { +public: + PAShapeInfer() {} + + IShapeInfer::Result infer(const std::vector>& input_shapes, + const std::unordered_map& data_dependency) override { + const auto& query_dims = input_shapes.front().get(); + + return {{query_dims}, ShapeInferStatus::success}; + } + + port_mask_t get_port_mask() const override { + return EMPTY_PORT_MASK; + } +}; + ShapeInferPtr SDPAShapeInferFactory::makeShapeInfer() const { + if (m_op->get_type_name() == std::string("PagedAttentionExtension")) { + return std::make_shared(); + } if (auto sdpa = std::dynamic_pointer_cast(m_op)) { const auto& config = sdpa->get_config(); if (config.output_BLHxS == false) diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rope_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rope_fusion.cpp index 839ccc1267d226..200659f5f2e322 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rope_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/rope_fusion.cpp @@ -387,8 +387,11 @@ ov::intel_cpu::RoPEFusionGPTJ::RoPEFusionGPTJ() { auto varsplit = makePattern({gather_sin_cos, -1, {ndims / 2, -1}}); varsplit->set_output_size(2); - auto unsqueeze_sin = makePattern({varsplit->output(0), {1, -1, 1, 32}}); - auto unsqueeze_cos = makePattern({varsplit->output(1), {1, -1, 1, 32}}); + // Reshape or UnSqueeze should both be support + auto unsqueeze_sin = makePattern({varsplit->output(0), {1, -1, 1, 32}}) | + makePattern({varsplit->output(0), 2}); + auto unsqueeze_cos = makePattern({varsplit->output(1), {1, -1, 1, 32}}) | + makePattern({varsplit->output(1), 2}); // repeate cos/sin table auto const_idx = makeConst(ov::element::i32, ov::PartialShape::dynamic(), [](const ov::op::v0::Constant& node) { const auto& vec = node.get_vector(); @@ -402,9 +405,6 @@ ov::intel_cpu::RoPEFusionGPTJ::RoPEFusionGPTJ() { auto repeat_interleave_sin = makePattern({unsqueeze_sin, const_idx, 3}, {{"batch_dims", 0}}); auto repeat_interleave_cos = makePattern({unsqueeze_cos, const_idx, 3}, {{"batch_dims", 0}}); - auto t_cos = makePattern(ov::Rank(4)); - auto t_sin = makePattern(ov::Rank(4)); - // x interleave (-x[:,:,:, 1::2], x[:,:,:, 0::2]) auto slice_Slice_1174 = GenSlice(slice_Slice_965, 1, int32_max, 2, 3); @@ -418,13 +418,16 @@ ov::intel_cpu::RoPEFusionGPTJ::RoPEFusionGPTJ() { auto ShapeOf_169068 = makePattern({stack_1182}); auto flatten_Slice_1194 = GenSlice(ShapeOf_169068, 0, 3, 1, 0); auto flatten_Concat_1197 = makePattern({flatten_Slice_1194, {-1}}, {{"axis", 0}}); + // If with special zero, no need to use shapeof to get full shape auto flatten_Reshape_1198 = makePattern({stack_1182, flatten_Concat_1197}); + auto flatten_Reshape_Zero = + makePattern({stack_1182, ov::pass::pattern::any_input()}, {{"special_zero", true}}); // x*cos [B,L,H,ndims] auto mul_cos = makePattern({slice_Slice_965, repeat_interleave_cos}, {{"auto_broadcast", "numpy"}}); auto mul_sin = - makePattern({flatten_Reshape_1198, repeat_interleave_sin}, {{"auto_broadcast", "numpy"}}); + makePattern({flatten_Reshape_1198 | flatten_Reshape_Zero, repeat_interleave_sin}, {{"auto_broadcast", "numpy"}}); // *cos + *sin auto rotary_emb = makePattern({mul_cos, mul_sin}, {{"auto_broadcast", "numpy"}}); @@ -460,15 +463,12 @@ ov::intel_cpu::RoPEFusionGPTJ::RoPEFusionGPTJ() { auto new_node = std::make_shared(new_args, config); new_node->set_friendly_name(old_node->get_friendly_name()); ov::copy_runtime_info({pattern_map.at(varsplit).get_node_shared_ptr(), - pattern_map.at(unsqueeze_sin).get_node_shared_ptr(), - pattern_map.at(unsqueeze_cos).get_node_shared_ptr(), pattern_map.at(repeat_interleave_sin).get_node_shared_ptr(), pattern_map.at(repeat_interleave_cos).get_node_shared_ptr(), pattern_map.at(neg_Multiply_1177).get_node_shared_ptr(), pattern_map.at(Unsqueeze_65524).get_node_shared_ptr(), pattern_map.at(Unsqueeze_65525).get_node_shared_ptr(), pattern_map.at(stack_1182).get_node_shared_ptr(), - pattern_map.at(flatten_Concat_1197).get_node_shared_ptr(), pattern_map.at(mul_cos).get_node_shared_ptr(), pattern_map.at(mul_sin).get_node_shared_ptr(), pattern_map.at(rotary_emb).get_node_shared_ptr(), @@ -476,6 +476,17 @@ ov::intel_cpu::RoPEFusionGPTJ::RoPEFusionGPTJ() { pattern_map.at(permute_Transpose_1213).get_node_shared_ptr()}, new_node); ov::replace_node(old_node, new_node); + // shapeof may be moved up from transpose to add, + // After RoPE fusion, shapeof must be moved to the data input of RoPE otherwise extra subgraph exists + std::shared_ptr rotary_emb_node = pattern_map.at(rotary_emb).get_node_shared_ptr(); + auto rotary_emb_out = rotary_emb_node->output(0); + if (rotary_emb_out.get_target_inputs().size() == 2) { + for (auto& input : rotary_emb_out.get_target_inputs()) { + if (ov::is_type(input.get_node())) { + input.replace_source_output(pattern_map.at(view_Reshape)); + } + } + } return true; }; diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index 7eec1229ab615b..3a58130da3463e 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -37,10 +37,12 @@ else() file(GLOB_RECURSE TMP_LIST_OF_TEST_CLASSES ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/classes/*.cpp) file(GLOB_RECURSE TMP_LIST_OF_COMMON_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/common/*.cpp) file(GLOB_RECURSE TMP_LIST_OF_ARM_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/arm/*.cpp) - file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/arm/*.cpp) - file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common/*.cpp) + file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/arm/*.cpp) + file(GLOB_RECURSE TMP_LIST_OF_COMMON_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common/*.cpp) + file(GLOB_RECURSE TMP_LIST_OF_SUBGRAPH_TEST_CLASSES ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/classes/*.*) + list(APPEND TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS - ${TMP_LIST_OF_TEST_CLASSES} ${TMP_LIST_OF_COMMON_TEST_INSTANCES} ${TMP_LIST_OF_ARM_TEST_INSTANCES} ${TMP_LIST_OF_ARM_SUBGRAPH_TESTS}) + ${TMP_LIST_OF_TEST_CLASSES} ${TMP_LIST_OF_COMMON_TEST_INSTANCES} ${TMP_LIST_OF_ARM_TEST_INSTANCES} ${TMP_LIST_OF_ARM_SUBGRAPH_TESTS} ${TMP_LIST_OF_COMMON_SUBGRAPH_TESTS} ${TMP_LIST_OF_SUBGRAPH_TEST_CLASSES}) set(TMP_EXPLICITLY_ENABLED_TESTS "${TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS}") endif() diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp index f0cdd344e17046..5fc89b979c261c 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/export_import.cpp @@ -104,18 +104,11 @@ const std::vector testing_property_for_performance_mode = { {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}}; -const std::vector testing_property_for_scheduling_core_type_1 = { +const std::vector testing_property_for_scheduling_core_type = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)}, - {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}}; - -const std::vector testing_property_for_scheduling_core_type_2 = { {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::PCORE_ONLY)}, {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; -const std::vector testing_property_for_scheduling_core_type_3 = { - {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)}, - {ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}}; - const std::vector testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)}, {ov::hint::enable_hyper_threading(false)}}; @@ -128,9 +121,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest, ::testing::Values(testing_property_for_streams, testing_property_for_threads, testing_property_for_performance_mode, - testing_property_for_scheduling_core_type_1, - testing_property_for_scheduling_core_type_2, - testing_property_for_scheduling_core_type_3, + testing_property_for_scheduling_core_type, testing_property_for_enable_hyper_threading, testing_property_for_enable_cpu_pinning))); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index d0ee8a889414cd..cef9e809bf2a62 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -33,6 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable RO_property(ov::hint::num_requests.name()), RO_property(ov::hint::enable_cpu_pinning.name()), RO_property(ov::hint::scheduling_core_type.name()), + RO_property(ov::hint::model_distribution_policy.name()), RO_property(ov::hint::enable_hyper_threading.name()), RO_property(ov::execution_devices.name()), RO_property(ov::intel_cpu::denormals_optimization.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 0d373252eddafd..1b29347d6c0605 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -47,6 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) { RW_property(ov::hint::num_requests.name()), RW_property(ov::hint::enable_cpu_pinning.name()), RW_property(ov::hint::scheduling_core_type.name()), + RW_property(ov::hint::model_distribution_policy.name()), RW_property(ov::hint::enable_hyper_threading.name()), RW_property(ov::device::id.name()), RW_property(ov::intel_cpu::denormals_optimization.name()), @@ -107,6 +108,22 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigInferenceNumThreads) { ASSERT_EQ(num_threads, value); } +TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigModelDistributionPolicy) { + ov::Core ie; + std::set value = {}; + std::set model_policy = {ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL}; + + ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); + ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); + ASSERT_EQ(model_policy, value); + + model_policy = {}; + + ASSERT_NO_THROW(ie.set_property("CPU", ov::hint::model_distribution_policy(model_policy))); + ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::model_distribution_policy)); + ASSERT_EQ(model_policy, value); +} + TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) { ov::Core ie; int32_t value = 0; diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp index 7efe100d98de3a..e02f6422d8a050 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp @@ -34,6 +34,10 @@ std::string EltwiseLayerCPUTest::getTestCaseName(testing::TestParamInfo& targetIn inputs.insert({funcInput.get_node_shared_ptr(), generate_eltwise_input( funcInput.get_element_type(), targetInputStaticShapes[i], - (funcInput.get_element_type() == element::i32) || (funcInput.get_element_type() == element::u32))}); + (funcInput.get_element_type() == element::i32) || (funcInput.get_element_type() == element::u32) || + (funcInput.get_element_type() == element::i8) || (funcInput.get_element_type() == element::u8))}); } } @@ -199,7 +212,11 @@ void EltwiseLayerCPUTest::SetUp() { } } - auto data_tensor = generate_eltwise_input(netType, shape, (netType == element::i32) || (netType == element::u32)); + auto data_tensor = generate_eltwise_input( + netType, + shape, + (netType == element::i32) || (netType == element::u32) || + (netType == element::i8) || (netType == element::u8)); if ((netType == ElementType::i8) || (netType == ElementType::u8)) { auto data_ptr = reinterpret_cast(data_tensor.data()); std::vector data(data_ptr, data_ptr + ov::shape_size(shape)); diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/concat.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/concat.cpp index 0d7eb6e3813712..ec040503c58062 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/concat.cpp @@ -105,6 +105,9 @@ const auto planar_5D = CPUSpecificParams{{ncdhw}, {ncdhw}, {}, "unknown"}; const auto planarChannels_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "ref"}; const auto planarChannels_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "ref"}; +const auto planarChannels_inplace_4D = CPUSpecificParams{{nhwc}, {nhwc}, {}, "unknown"}; +const auto planarChannels_inplace_5D = CPUSpecificParams{{ndhwc}, {ndhwc}, {}, "unknown"}; + const auto blocked8_4D = CPUSpecificParams{{nChw8c}, {nChw8c}, {}, "unknown"}; const auto blocked8_5D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {}, "unknown"}; @@ -810,6 +813,32 @@ INSTANTIATE_TEST_SUITE_P(smoke_Concat_inPlace, ::testing::Values(CPUSpecificParams{{}, {}, {}, "unknown"})), ConcatLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Concat_CPU_planarChannels_inplace_4D_static, + ConcatLayerCPUTest, + ::testing::Combine(::testing::Values(1), + ::testing::Values(static_shapes_to_test_representation({{1, 32, 1, 1}, + {1, 32, 1, 1}})), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(planarChannels_inplace_4D)), + ConcatLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Concat_CPU_planarChannels_inplace_4D_sp_w_static, + ConcatLayerCPUTest, + ::testing::Combine(::testing::Values(2), + ::testing::Values(static_shapes_to_test_representation({{1, 1, 32, 32}, + {1, 1, 32, 32}})), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(planarChannels_inplace_4D)), + ConcatLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Concat_CPU_planarChannels_inplace_5D_static, + ConcatLayerCPUTest, + ::testing::Combine(::testing::Values(1), + ::testing::Values(static_shapes_to_test_representation({{1, 32, 1, 1, 1}, + {1, 32, 1, 1, 1}})), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(planarChannels_inplace_5D)), + ConcatLayerCPUTest::getTestCaseName); } // namespace } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/eltwise.cpp index c2503e601216b9..875a5b143dbecf 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/eltwise.cpp @@ -133,7 +133,7 @@ const auto params_4D_int_jit = ::testing::Combine( ::testing::ValuesIn({ utils::EltwiseTypes::ADD, utils::EltwiseTypes::MULTIPLY }), ::testing::ValuesIn(secondaryInputTypes()), ::testing::ValuesIn(opTypes()), - ::testing::ValuesIn({ ElementType::i32, ElementType::f32 }), + ::testing::ValuesIn({ ElementType::i8, ElementType::u8, ElementType::f16, ElementType::i32, ElementType::f32 }), ::testing::Values(ov::element::undefined), ::testing::Values(ov::element::undefined), ::testing::Values(ov::test::utils::DEVICE_CPU), diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/eltwise_chain.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/eltwise_chain.cpp new file mode 100644 index 00000000000000..32c07193bedf99 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/eltwise_chain.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include "custom/subgraph_tests/src/classes/eltwise_chain.hpp" + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/node_builders/constant.hpp" +#include "common_test_utils/node_builders/eltwise.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +using namespace CPUTestUtils; + +namespace ov { +namespace test { +using namespace ov::test::utils; +using namespace ov::test::eltwise_chain; + +namespace { + +std::vector> eltwiseOpsConvertInt8 = { + { EltwiseTypes::MULTIPLY }, + { EltwiseTypes::ADD }, + { EltwiseTypes::DIVIDE } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_MergeConvert_int8, EltwiseChainTest, + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesConvert())), + ::testing::Values(InputLayerType::CONSTANT), + ::testing::ValuesIn(inputPrecisionsConvert()), + ::testing::ValuesIn(eltwiseOpsConvertInt8), + ::testing::Values(false), + ::testing::ValuesIn({ov::element::i8, ov::element::u8}), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + EltwiseChainTest::getTestCaseName); + +} // namespace +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/eltwise_chain.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/eltwise_chain.cpp new file mode 100644 index 00000000000000..1dbdbab49e4adb --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/eltwise_chain.cpp @@ -0,0 +1,225 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "eltwise_chain.hpp" + +#include +#include +#include +#include + +using namespace CPUTestUtils; + +namespace ov { +namespace test { +using namespace ov::test::utils; + +std::string EltwiseChainTest::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector inputShapes; + InputLayerType secondaryInputType; + std::vector inputPrecisions; + std::vector eltwiseOpTypes; + bool withQuantization; + ov::element::Type conversion; + std::string targetName; + std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, conversion, targetName) = obj.param; + std::ostringstream results; + + results << "IS=("; + for (const auto& shape : inputShapes) { + results << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + results << ")_TS=("; + for (const auto& shape : inputShapes) { + for (const auto& item : shape.second) { + results << ov::test::utils::vec2str(item) << "_"; + } + } + for (size_t i = 0; i < inputPrecisions.size(); i++) { + results << "InPRC" << std::to_string(i) << "=" << inputPrecisions[i] << "_"; + } + for (size_t i = 0; i < eltwiseOpTypes.size(); i++) { + results << "Op" << std::to_string(i) << "=" << eltwiseOpTypes[i] << "_"; + } + results << "secondaryInputType=" << secondaryInputType << "_"; + results << "WithQuant=" << withQuantization << "_"; + if (conversion != ov::element::undefined) { + results << "Conversion=" << conversion << "_"; + } + results << "targetDevice=" << targetName; + + return results.str(); +} + +ov::Tensor EltwiseChainTest::generate_eltwise_input(const ov::element::Type& type, const ov::Shape& shape) { + struct gen_params { + uint32_t range; + int32_t start_from; + int32_t resolution; + + gen_params(uint32_t range = 10, int32_t start_from = 0, int32_t resolution = 1) + : range(range), start_from(start_from), resolution(resolution) {} + }; + + gen_params params = type.is_real() ? gen_params(10, 1) : gen_params(10, 10); + + ov::test::utils::InputGenerateData in_data; + in_data.start_from = params.start_from; + in_data.range = params.range; + in_data.resolution = params.resolution; + auto tensor = ov::test::utils::create_and_fill_tensor(type, shape, in_data); + return tensor; +} + +void EltwiseChainTest::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + inputs.insert({funcInput.get_node_shared_ptr(), generate_eltwise_input( + funcInput.get_element_type(), + targetInputStaticShapes[i])}); + } +} + +void EltwiseChainTest::SetUp() { + abs_threshold = 0.1f; + + std::vector inputShapes; + InputLayerType secondaryInputType; + std::vector inputPrecisions; + std::vector eltwiseOpTypes; + bool withQuantization; + ov::element::Type conversion; + std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, conversion, targetDevice) = this->GetParam(); + + init_input_shapes(inputShapes); + + ov::ParameterVector paramVec; + std::vector> inputNodes1; + std::vector> inputNodes2; + if (secondaryInputType == utils::InputLayerType::PARAMETER) { + for (size_t i = 0; i < inputDynamicShapes.size(); i++) { + const auto param = std::make_shared(inputPrecisions[i], inputDynamicShapes[i]); + paramVec.push_back(param); + + const auto inputNode = (conversion == ov::element::undefined) ? + param : + std::dynamic_pointer_cast(std::make_shared(param, conversion)); + if (inputNodes1.empty()) { + inputNodes1.push_back(inputNode); + } + inputNodes2.push_back(inputNode); + } + } else { + paramVec = ov::ParameterVector {std::make_shared(inputPrecisions[0], inputDynamicShapes.front())}; + inputNodes1.push_back( + conversion == ov::element::undefined ? + paramVec.front() : + std::dynamic_pointer_cast(std::make_shared(paramVec.front(), conversion))); + + for (size_t i = 1; i < inputPrecisions.size(); i++) { + std::vector input1Data(ov::shape_size(targetStaticShapes[0][i])); + inputNodes2.push_back(ov::test::utils::deprecated::make_constant( + conversion == ov::element::undefined ? static_cast(inputPrecisions[i]) : conversion, + targetStaticShapes[0][i], + input1Data, + true)); + } + } + + if (withQuantization) { + std::vector> eltwiseOps; + eltwiseOps.push_back(make_eltwise(inputNodes1[0], inputNodes2[0], eltwiseOpTypes[0])); + for (size_t i = 1; i < eltwiseOpTypes.size() - 1; i++) { + eltwiseOps.push_back(make_eltwise(eltwiseOps[eltwiseOps.size() - 1], inputNodes2[i], eltwiseOpTypes[i])); + } + + std::vector constShape(targetStaticShapes[0][0].size(), 1); + constShape[1] = targetStaticShapes[0][0][1]; + auto fq = ov::test::utils::make_fake_quantize(eltwiseOps[eltwiseOps.size() - 1], + ov::element::Type(ov::element::f32), + 256, + constShape); + + eltwiseOps.push_back(make_eltwise(fq, inputNodes2[eltwiseOpTypes.size() - 1], eltwiseOpTypes[eltwiseOpTypes.size() - 1])); + + ov::ResultVector results{std::make_shared(eltwiseOps[eltwiseOps.size() - 1])}; + function = std::make_shared(results, paramVec, "eltwise_chain_fq"); + } else { + std::vector> eltwiseOps; + eltwiseOps.push_back(make_eltwise(inputNodes1[0], inputNodes2[0], eltwiseOpTypes[0])); + for (size_t i = 1; i < eltwiseOpTypes.size(); i++) { + eltwiseOps.push_back(make_eltwise(eltwiseOps[eltwiseOps.size() - 1], inputNodes2[i], eltwiseOpTypes[i])); + } + + ov::ResultVector results{std::make_shared(eltwiseOps[eltwiseOps.size() - 1])}; + function = std::make_shared(results, paramVec, "eltwise_chain"); + } +} + +TEST_P(EltwiseChainTest, CompareWithRefs) { + run(); +} + +namespace eltwise_chain { +std::vector> inputShapes() { + return { + {{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}}, + {{1, 48, 5, 6}, {1, 48, 1, 1}, {1, 48, 5, 6}, {1, 1, 5, 6}}, + {{1, 72, 28, 28}, {1, 72, 1, 1}, {1, 72, 1, 1}, {1, 72, 1, 1}}, + {{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}}, + {{1, 2, 3}, {3}, {3}, {3}}, + {{1, 12, 5, 5}, {5, 5}, {12, 5, 5}, {1}}, + {{3, 12, 5, 5}, {1, 12, 5, 1}, {3, 1, 1, 1}, {3, 12, 5, 5}}, + {{1, 1, 1, 1}, {1, 12, 5, 1}, {3, 12, 1, 5}, {3, 12, 5, 1}}, + {{1, 1, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}} + }; +} + +std::vector> inputPrecisions() { + return { + { ElementType::f32, ElementType::f32, ElementType::f32, ElementType::f32 }, + { ElementType::i32, ElementType::i32, ElementType::i32, ElementType::i32 } + }; +} + +std::vector> eltwiseOps() { + return { + {EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT}, + {EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD} + }; +} + +std::vector> inputShapesConvert() { + return { + {{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}} + }; +} + +std::vector> eltwiseOpsConvert() { + return { + {EltwiseTypes::MULTIPLY}, + {EltwiseTypes::ADD}, + {EltwiseTypes::DIVIDE}, + {EltwiseTypes::SUBTRACT}, + {EltwiseTypes::POWER}, + }; +} + +std::vector> inputPrecisionsConvert() { + return { + {ElementType::i8, ElementType::f32, ElementType::f32}, + {ElementType::u8, ElementType::f32, ElementType::f32}, + {ElementType::i16, ElementType::f32, ElementType::f32}, + {ElementType::u16, ElementType::f32, ElementType::f32}, + {ElementType::i32, ElementType::f32, ElementType::f32}, + {ElementType::f16, ElementType::f32, ElementType::f32}, + {ElementType::f32, ElementType::f32, ElementType::f32}, + }; +} +} // namespace eltwise_chain + +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/eltwise_chain.hpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/eltwise_chain.hpp new file mode 100644 index 00000000000000..17954b438abd73 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/eltwise_chain.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include "common_test_utils/node_builders/constant.hpp" +#include "common_test_utils/node_builders/fake_quantize.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/node_builders/eltwise.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "utils/cpu_test_utils.hpp" + +namespace ov { +namespace test { + +typedef std::tuple, // Input shapes + ov::test::utils::InputLayerType, // Secondary input type + std::vector, // Input precisions + std::vector, // Eltwise operations + bool, // With quantization + ov::element::Type, // Conversion type + std::string // Device name + > +EltwiseChainTuple; + +class EltwiseChainTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + ov::Tensor generate_eltwise_input(const ov::element::Type& type, const ov::Shape& shape); + void generate_inputs(const std::vector& targetInputStaticShapes) override; + +protected: + void SetUp() override; +}; + +namespace eltwise_chain { +std::vector> inputShapes(); +std::vector> inputPrecisions(); +std::vector> eltwiseOps(); +std::vector> inputShapesConvert(); +std::vector> eltwiseOpsConvert(); +std::vector> inputPrecisionsConvert(); +} // namespace eltwise_chain +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/eltwise_chain.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/eltwise_chain.cpp index 76bce3abff6363..7c78526928d8cc 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/eltwise_chain.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/eltwise_chain.cpp @@ -5,233 +5,40 @@ #include #include #include -#include + +#include "custom/subgraph_tests/src/classes/eltwise_chain.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "common_test_utils/node_builders/constant.hpp" -#include "common_test_utils/node_builders/fake_quantize.hpp" -#include "common_test_utils/common_utils.hpp" -#include "common_test_utils/ov_tensor_utils.hpp" -#include "utils/cpu_test_utils.hpp" #include "common_test_utils/node_builders/eltwise.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" using namespace CPUTestUtils; namespace ov { namespace test { using namespace ov::test::utils; - -typedef std::tuple, // Input shapes - InputLayerType, // Secondary input type - std::vector, // Input precisions - std::vector, // Eltwise operations - bool, // With quantization - ov::element::Type, // Conversion type - std::string // Device name - > - EltwiseChainTuple; - -class EltwiseChainTest : public testing::WithParamInterface, - virtual public SubgraphBaseTest { -public: - static std::string getTestCaseName(const testing::TestParamInfo &obj) { - std::vector inputShapes; - InputLayerType secondaryInputType; - std::vector inputPrecisions; - std::vector eltwiseOpTypes; - bool withQuantization; - ov::element::Type conversion; - std::string targetName; - std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, conversion, targetName) = obj.param; - std::ostringstream results; - - results << "IS=("; - for (const auto& shape : inputShapes) { - results << ov::test::utils::partialShape2str({shape.first}) << "_"; - } - results << ")_TS=("; - for (const auto& shape : inputShapes) { - for (const auto& item : shape.second) { - results << ov::test::utils::vec2str(item) << "_"; - } - } - for (size_t i = 0; i < inputPrecisions.size(); i++) { - results << "InPRC" << std::to_string(i) << "=" << inputPrecisions[i] << "_"; - } - for (size_t i = 0; i < eltwiseOpTypes.size(); i++) { - results << "Op" << std::to_string(i) << "=" << eltwiseOpTypes[i] << "_"; - } - results << "secondaryInputType=" << secondaryInputType << "_"; - results << "WithQuant=" << withQuantization << "_"; - if (conversion == ov::element::undefined) { - results << "Conversion" << conversion << "_"; - } - results << "targetDevice=" << targetName; - - return results.str(); - } - - void generate_inputs(const std::vector& targetInputStaticShapes) override { - inputs.clear(); - const auto& funcInputs = function->inputs(); - for (size_t i = 0; i < funcInputs.size(); ++i) { - const auto& funcInput = funcInputs[i]; - ov::Tensor tensor; - ov::test::utils::InputGenerateData in_data; - in_data.start_from = 1; - in_data.range = 10; - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], in_data); - inputs.insert({funcInput.get_node_shared_ptr(), tensor}); - } - } - -protected: - void SetUp() override { - abs_threshold = 0.1f; - - std::vector inputShapes; - InputLayerType secondaryInputType; - std::vector inputPrecisions; - std::vector eltwiseOpTypes; - bool withQuantization; - ov::element::Type conversion; - std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, conversion, targetDevice) = this->GetParam(); - - init_input_shapes(inputShapes); - - ov::ParameterVector paramVec; - std::vector> inputNodes1; - std::vector> inputNodes2; - if (secondaryInputType == utils::InputLayerType::PARAMETER) { - for (size_t i = 0; i < inputDynamicShapes.size(); i++) { - const auto param = std::make_shared(inputPrecisions[i], inputDynamicShapes[i]); - paramVec.push_back(param); - - const auto inputNode = (conversion == ov::element::undefined) ? - param : - std::dynamic_pointer_cast(std::make_shared(param, conversion)); - if (inputNodes1.empty()) { - inputNodes1.push_back(inputNode); - } - inputNodes2.push_back(inputNode); - } - } else { - paramVec = ov::ParameterVector {std::make_shared(inputPrecisions[0], inputDynamicShapes.front())}; - inputNodes1.push_back( - conversion == ov::element::undefined ? - paramVec.front() : - std::dynamic_pointer_cast(std::make_shared(paramVec.front(), conversion))); - - for (size_t i = 1; i < inputPrecisions.size(); i++) { - std::vector input1Data(ov::shape_size(targetStaticShapes[0][i])); - inputNodes2.push_back(ov::test::utils::deprecated::make_constant( - conversion == ov::element::undefined ? static_cast(inputPrecisions[i]) : conversion, - targetStaticShapes[0][i], - input1Data, - true)); - } - } - - if (withQuantization) { - std::vector> eltwiseOps; - eltwiseOps.push_back(make_eltwise(inputNodes1[0], inputNodes2[0], eltwiseOpTypes[0])); - for (size_t i = 1; i < eltwiseOpTypes.size() - 1; i++) { - eltwiseOps.push_back(make_eltwise(eltwiseOps[eltwiseOps.size() - 1], inputNodes2[i], eltwiseOpTypes[i])); - } - - std::vector constShape(targetStaticShapes[0][0].size(), 1); - constShape[1] = targetStaticShapes[0][0][1]; - auto fq = ov::test::utils::make_fake_quantize(eltwiseOps[eltwiseOps.size() - 1], - ov::element::Type(ov::element::f32), - 256, - constShape); - - eltwiseOps.push_back(make_eltwise(fq, inputNodes2[eltwiseOpTypes.size() - 1], eltwiseOpTypes[eltwiseOpTypes.size() - 1])); - - ov::ResultVector results{std::make_shared(eltwiseOps[eltwiseOps.size() - 1])}; - function = std::make_shared(results, paramVec, "eltwise_chain_fq"); - } else { - std::vector> eltwiseOps; - eltwiseOps.push_back(make_eltwise(inputNodes1[0], inputNodes2[0], eltwiseOpTypes[0])); - for (size_t i = 1; i < eltwiseOpTypes.size(); i++) { - eltwiseOps.push_back(make_eltwise(eltwiseOps[eltwiseOps.size() - 1], inputNodes2[i], eltwiseOpTypes[i])); - } - - ov::ResultVector results{std::make_shared(eltwiseOps[eltwiseOps.size() - 1])}; - function = std::make_shared(results, paramVec, "eltwise_chain"); - } - } -}; - -TEST_P(EltwiseChainTest, CompareWithRefs) { - run(); -} +using namespace ov::test::eltwise_chain; namespace { -std::vector> inputShapes = { - {{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}}, - {{1, 48, 5, 6}, {1, 48, 1, 1}, {1, 48, 5, 6}, {1, 1, 5, 6}}, - {{1, 72, 28, 28}, {1, 72, 1, 1}, {1, 72, 1, 1}, {1, 72, 1, 1}}, - {{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}}, - {{1, 2, 3}, {3}, {3}, {3}}, - {{1, 12, 5, 5}, {5, 5}, {12, 5, 5}, {1}}, - {{3, 12, 5, 5}, {1, 12, 5, 1}, {3, 1, 1, 1}, {3, 12, 5, 5}}, - {{1, 1, 1, 1}, {1, 12, 5, 1}, {3, 12, 1, 5}, {3, 12, 5, 1}}, - {{1, 1, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}} -}; - -std::vector> inputPrecisions = { - { ElementType::f32, ElementType::f32, ElementType::f32, ElementType::f32 }, - { ElementType::i32, ElementType::i32, ElementType::i32, ElementType::i32 } -}; - -std::vector> eltwiseOps = { - { EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT }, - { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD } -}; - INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest, ::testing::Combine( - ::testing::ValuesIn(static_shapes_to_test_representation(inputShapes)), + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapes())), ::testing::Values(InputLayerType::CONSTANT), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(eltwiseOps), + ::testing::ValuesIn(inputPrecisions()), + ::testing::ValuesIn(eltwiseOps()), ::testing::Values(false), ::testing::Values(ov::element::undefined), ::testing::Values(ov::test::utils::DEVICE_CPU)), EltwiseChainTest::getTestCaseName); - - std::vector> inputShapesConvert = { - {{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}} - }; - - std::vector> inputPrecisionsConvert = { - { ElementType::i8, ElementType::f32, ElementType::f32 }, - { ElementType::u8, ElementType::f32, ElementType::f32 }, - { ElementType::i16, ElementType::f32, ElementType::f32 }, - { ElementType::u16, ElementType::f32, ElementType::f32 }, - { ElementType::i32, ElementType::f32, ElementType::f32 }, - // { ElementType::u32, ElementType::f32, ElementType::f32 }, // plugin doesn't support - { ElementType::f16, ElementType::f32, ElementType::f32 }, - { ElementType::f32, ElementType::f32, ElementType::f32 }, - }; - - std::vector> eltwiseOpsConvert = { - { EltwiseTypes::MULTIPLY }, - { EltwiseTypes::ADD }, - { EltwiseTypes::DIVIDE }, - { EltwiseTypes::SUBTRACT }, - { EltwiseTypes::POWER }, - }; - INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_MergeConvert, EltwiseChainTest, ::testing::Combine( - ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesConvert)), + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesConvert())), ::testing::Values(InputLayerType::CONSTANT), - ::testing::ValuesIn(inputPrecisionsConvert), - ::testing::ValuesIn(eltwiseOpsConvert), + ::testing::ValuesIn(inputPrecisionsConvert()), + ::testing::ValuesIn(eltwiseOpsConvert()), ::testing::Values(false), ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -263,7 +70,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChainWithFQ, EltwiseChainTest, ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesFQ)), ::testing::Values(InputLayerType::CONSTANT), ::testing::ValuesIn(inputPrecisionsFQ), - ::testing::ValuesIn(eltwiseOps), + ::testing::ValuesIn(eltwiseOps()), ::testing::Values(true), ::testing::Values(ov::element::undefined), ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -521,8 +328,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_dyn, EltwiseChainTest, ::testing::Combine( ::testing::ValuesIn(inputShapes_dyn), ::testing::Values(InputLayerType::PARAMETER), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(eltwiseOps), + ::testing::ValuesIn(inputPrecisions()), + ::testing::ValuesIn(eltwiseOps()), ::testing::Values(false), ::testing::Values(ov::element::undefined), ::testing::Values(ov::test::utils::DEVICE_CPU)), diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp index 3fdaadc8d4362e..d0bf420278e412 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp @@ -457,5 +457,148 @@ TEST_F(RoPECPUTestQwen7b, smoke_CompareWithRefs) { CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); } +class RoPECPUTestGPTJ : public SubgraphBaseTest, public testing::WithParamInterface { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + bool hasShapeOf; + hasShapeOf = obj.param; + std::ostringstream result; + result << "hasShapeOf=" << hasShapeOf << std::endl; + return result.str(); + } + void generate_inputs(const std::vector& targetInputStaticShapes) override { + const auto& funcInputs = function->inputs(); + + auto& input_shape = targetInputStaticShapes[0]; + auto& sincos_shape = targetInputStaticShapes[1]; + ov::Tensor t_input = + utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); + ov::Tensor t_cos_sin_cache = + utils::create_and_fill_tensor(funcInputs[1].get_element_type(), sincos_shape, 2, -1.0f, 32768); + + inputs.clear(); + inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); + inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); + } + +protected: + std::shared_ptr buildROPE_GPTJ(const int num_head, + const int hidden_dims, + const int rotary_dims, + bool hasShapeOf) { + auto int32_max = std::numeric_limits::max(); + auto input = + std::make_shared(ov::element::f32, PartialShape{-1, -1, num_head, hidden_dims}); + auto sincos = std::make_shared(ov::element::f32, PartialShape{-1, -1, rotary_dims}); + + auto slice_Slice_965 = + makeOP({input, {0, 0, 0, 0}, {0, 0, 0, rotary_dims}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + slice_Slice_965->set_friendly_name("slice_Slice_965"); + + auto varsplit = makeOP({sincos, -1, {rotary_dims / 2, -1}}); + varsplit->set_output_size(2); + varsplit->set_friendly_name("varsplit"); + auto unsqueeze_sin = makeOP({varsplit->output(0), 2}); + auto unsqueeze_cos = makeOP({varsplit->output(1), 2}); + std::vector gather_idx(rotary_dims, 1); + int32_t v = 0; + for (size_t i = 0; i < gather_idx.size(); i += 2, v++) { + gather_idx[i] = v; + gather_idx[i + 1] = v; + } + + auto const_idx = makeConst(ov::element::i32, ov::Shape({static_cast(rotary_dims)}), gather_idx); + auto constant_155588 = makeConst(element::f32, + ov::Shape({ + 1, + 1, + 1, + 1, + }), + {-1.000000f}); + auto repeat_interleave_sin = makeOP({unsqueeze_sin, const_idx, 3}, {{"batch_dims", 0}}); + auto repeat_interleave_cos = makeOP({unsqueeze_cos, const_idx, 3}, {{"batch_dims", 0}}); + repeat_interleave_sin->set_friendly_name("repeat_interleave_sin"); + repeat_interleave_cos->set_friendly_name("repeat_interleave_cos"); + // x interleave (-x[:,:,:, 1::2], x[:,:,:, 0::2]) + auto slice_Slice_1174 = + makeOP({slice_Slice_965, {0, 0, 0, 1}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto neg_Multiply_1177 = + makeOP({slice_Slice_1174, constant_155588}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze_65524 = makeOP({neg_Multiply_1177, -1}); + + auto slice_Slice_1168 = + makeOP({slice_Slice_965, {0, 0, 0, 0}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto Unsqueeze_65525 = makeOP({slice_Slice_1168, -1}); + auto stack_1182 = makeOP({Unsqueeze_65524, Unsqueeze_65525}, {{"axis", -1}}); + auto flatten_Reshape_1198 = + makeOP({stack_1182, {0, 0, num_head, rotary_dims}}, {{"special_zero", true}}); + // x*cos [B,L,H,ndims] + auto mul_cos = + makeOP({slice_Slice_965, repeat_interleave_cos}, {{"auto_broadcast", "numpy"}}); + mul_cos->set_friendly_name("mul_cos"); + auto mul_sin = + makeOP({flatten_Reshape_1198, repeat_interleave_sin}, {{"auto_broadcast", "numpy"}}); + // *cos + *sin + auto rotary_emb = makeOP({mul_cos, mul_sin}, {{"auto_broadcast", "numpy"}}); + + auto slice_Slice_971 = + makeOP({input, {0, 0, 0, rotary_dims}, {0, 0, 0, int32_max}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto cat_Concat_1211 = makeOP({rotary_emb, slice_Slice_971}, {{"axis", -1}}); + auto permute_Transpose_1213 = makeOP({cat_Concat_1211, {0, 2, 1, 3}}); + ov::NodeVector model_output = {permute_Transpose_1213}; + if (hasShapeOf) { + auto shapeOf = makeOP({rotary_emb}, {{"output_type", "i32"}}); + auto gather = makeOP({shapeOf, {1}, 0}, {{"batch_dims", 0}}); + model_output.push_back(gather); + } + return std::make_shared(model_output, ov::ParameterVector{input, sincos}); + } + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + bool hasShapeOf = this->GetParam(); + const int batch = 2; + const int seq_length = 7; + const int num_head = 16; + const int hidden_dims = 256; + const int rotary_dims = 64; + + InputShape input = {{batch, seq_length, num_head, hidden_dims}, {{batch, seq_length, num_head, hidden_dims}}}; + InputShape sincos = {{batch, seq_length, rotary_dims}, {{batch, seq_length, rotary_dims}}}; + init_input_shapes({input, sincos}); + function = buildROPE_GPTJ(num_head, hidden_dims, rotary_dims, hasShapeOf); + } +}; + +TEST_P(RoPECPUTestGPTJ, smoke_CompareWithRefs) { + run(); + CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); +} + +INSTANTIATE_TEST_SUITE_P(smoke_RoPECPUTestGPTJ, + RoPECPUTestGPTJ, + ::testing::Values(true, false), + RoPECPUTestGPTJ::getTestCaseName); + } // namespace test } // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 8280bdfe251783..2e1c1e1941a9e3 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -323,6 +323,12 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*fma.*EltwiseLayerCPUTest.*)"); retVector.emplace_back(R"(.*int_jit.*EltwiseLayerCPUTest.*)"); retVector.emplace_back(R"(.*dyn.*EltwiseChainTest.*)"); + + retVector.emplace_back(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=i8.*Conversion=i8.*)"); + retVector.emplace_back(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=u8.*Conversion=i8.*)"); + retVector.emplace_back(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=i16.*Conversion=i8.*)"); + retVector.emplace_back(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=u16.*Conversion=i8.*)"); + retVector.emplace_back(R"(.*smoke_EltwiseChain_MergeConvert_int8/.*InPRC0=i32.*Conversion=i8.*)"); #endif #if !defined(OPENVINO_ARCH_X86_64) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index a8f8b34b1c5a7f..15450eb6b842bb 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -102,7 +102,7 @@ class debug_configuration { int verbose_color; // Print verbose color int list_layers; // Print list layers int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive - int print_input_data_shapes; // Print the input data_shape for benchmark_app. + int print_input_data_shapes; // Print the input data_shape for benchmark_app. int disable_usm; // Disable usm usage int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) int disable_onednn_opt_post_ops; // Disable onednn optimize post operators diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index f5b6ec2221addb..3f0a69916f9ef2 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -42,7 +42,17 @@ void compile_graph::run(program& p) { auto& node = *(std::next(proc_order.begin(), idx)); const bool use_shape_agnostic_impl = !p.get_config().get_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape); const impl_types original_impl_type = node->get_preferred_impl_type(); - const bool change_initial_impl = node->is_dynamic() && original_impl_type == impl_types::onednn; + bool change_initial_impl = node->is_dynamic() && original_impl_type == impl_types::onednn; + + if (node->is_type() && change_initial_impl) { + const auto fc_prim = node->as().get_primitive(); + const auto weights_dt = node->get_input_layout(1).data_type; + + // Do not change impl (i.e. do not use ocl shape-agnostic kernels) in case of FC and 8bit compressed weights, + // since oneDNN primitives/kernels caching mechanism will be used instead. + if (fc_prim->compressed_weights && ov::element::Type(weights_dt).bitwidth() == 8) + change_initial_impl = false; + } if (change_initial_impl) node->set_preferred_impl_type(impl_types::ocl); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp index 443771d40dcc22..2ed48b659d3c38 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp @@ -33,6 +33,12 @@ struct gemm_impl : multi_stage_primitive { return make_unique(*this); } + gemm_impl() = default; + + gemm_impl(const std::vector& kd) : parent(kd) { + this->can_reuse_memory = true; + } + void load(BinaryInputBuffer& ib) override { parent::load(ib); if (is_dynamic()) { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/multi_stage_primitive.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/multi_stage_primitive.hpp index 658cdc88d18618..b035e6b900e92c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/multi_stage_primitive.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/multi_stage_primitive.hpp @@ -51,7 +51,7 @@ struct multi_stage_primitive : public typed_primitive_impl { for (size_t k = 0; k < other._kernels.size(); ++k) { _kernels.emplace_back(other._kernels[k]->clone()); } - this->can_reuse_memory = false; + this->can_reuse_memory = other.can_reuse_memory; this->_kernel_name = other._kernel_name; this->_is_dynamic = other._is_dynamic; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp index 22753be0a71a43..c8f0a5cbae10c0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp @@ -34,6 +34,15 @@ struct count_nonzero_impl : typed_primitive_impl_ocl { } } + event::ptr execute_impl(const std::vector& events, count_nonzero_inst& instance) override { + if (instance.get_impl_params()->input_layouts[0].count() == 0) { + // set count of non-zero elements to 0 in case if input tensor is empty to have correct memory alloc for gather_nonzero + return instance.output_memory(0).fill(instance.get_network().get_stream(), 0); + } else { + return parent::execute_impl(events, instance); + } + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { return get_default_params(impl_param, is_shape_agnostic); } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index 82d785ab943029..e9a18c00253059 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -50,6 +50,26 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { args.insert({DNNL_ARG_BIAS, bias->get_onednn_memory(_pd.weights_desc(1), offset)}); } + const auto& prim = instance.get_impl_params()->typed_desc(); + if (prim->compressed_weights) { + const auto weights_dt = instance.get_input_layout(1).data_type; + OPENVINO_ASSERT(ov::element::Type(weights_dt).bitwidth() == 8, "[GPU] oneDNN supports only 8bit compressed weights"); + + if (!prim->decompression_scale.empty()) { + auto decompression_scale_idx = prim->bias.empty() ? 2 : 3; + auto scale_mem = instance.dep_memory_ptr(decompression_scale_idx); + dnnl::memory::desc desc = onednn::layout_to_memory_desc(scale_mem->get_layout(), dnnl::memory::format_tag::a, true); + args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, scale_mem->get_onednn_memory(desc)}); + } + + if (!prim->decompression_zero_point.empty()) { + auto decompression_zp_idx = prim->bias.empty() ? 3 : 4; + auto zp_mem = instance.dep_memory_ptr(decompression_zp_idx); + dnnl::memory::desc desc = onednn::layout_to_memory_desc(zp_mem->get_layout(), dnnl::memory::format_tag::a, true); + args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, zp_mem->get_onednn_memory(desc)}); + } + } + return args; } @@ -91,13 +111,7 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { false); } - static std::shared_ptr get_fully_connected_primitive_descriptor(const kernel_impl_params& impl_params, - cldnn::engine& engine, size_t prim_input_size, bool has_bias, - const dnnl::primitive_attr& attr = dnnl::primitive_attr()) { - auto input_layout = impl_params.get_input_layout(0); - auto weights_layout = impl_params.get_input_layout(1); - auto output_layout = impl_params.get_output_layout(); - + static void transform_layouts(layout& input_layout, layout& weights_layout, layout& output_layout, size_t prim_input_size) { auto input_pshape = input_layout.get_partial_shape(); auto weights_pshape = weights_layout.get_partial_shape(); @@ -108,7 +122,7 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { } if (input_size > 3) { - input_layout.set_partial_shape(reshape_to_2d(input_pshape, feature)); + input_layout.set_partial_shape(reshape_to_2d(input_pshape, feature)); } if (weights_pshape.size() != 2) { weights_layout.set_partial_shape(reshape_to_2d(weights_pshape, feature)); @@ -123,6 +137,19 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { combine_bf_with_first_spatial_dim(input_layout); combine_bf_with_first_spatial_dim(output_layout); } + } + + static std::shared_ptr + get_inner_product_primitive_descriptor(const kernel_impl_params& impl_params, + cldnn::engine& engine, + size_t prim_input_size, + bool has_bias, + const dnnl::primitive_attr& attr = dnnl::primitive_attr()) { + auto input_layout = impl_params.get_input_layout(0); + auto weights_layout = impl_params.get_input_layout(1); + auto output_layout = impl_params.get_output_layout(); + + transform_layouts(input_layout, weights_layout, output_layout, prim_input_size); auto input_md = onednn::layout_to_memory_desc(input_layout, dnnl::memory::format_tag::undef, false); auto weights_md = onednn::layout_to_memory_desc(weights_layout, dnnl::memory::format_tag::any); @@ -149,6 +176,41 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { } } + static std::shared_ptr + get_matmul_primitive_descriptor(const kernel_impl_params& impl_params, + cldnn::engine& engine, + size_t prim_input_size, + bool has_bias, + const dnnl::primitive_attr& attr = dnnl::primitive_attr()) { + auto input_layout = impl_params.get_input_layout(0); + auto weights_layout = impl_params.get_input_layout(1); + auto output_layout = impl_params.get_output_layout(); + + transform_layouts(input_layout, weights_layout, output_layout, prim_input_size); + + auto input_md = onednn::layout_to_memory_desc(input_layout, dnnl::memory::format_tag::ab, false); + auto weights_md = onednn::layout_to_memory_desc(weights_layout, dnnl::memory::format_tag::ba); + auto output_md = onednn::layout_to_memory_desc(output_layout, dnnl::memory::format_tag::ab, false); + + if (has_bias) { + auto bias_md = onednn::layout_to_memory_desc(impl_params.get_input_layout(2), dnnl::memory::format_tag::ab, false); + return std::make_shared( + engine.get_onednn_engine(), + input_md, + weights_md, + bias_md, + output_md, + attr); + } else { + return std::make_shared( + engine.get_onednn_engine(), + input_md, + weights_md, + output_md, + attr); + } + } + public: void save(BinaryOutputBuffer& ob) const override { #ifdef ONEDNN_PRIMITIVE_SERIALIZATION @@ -158,8 +220,10 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { auto prim = impl_params->typed_desc(); size_t input_size = prim->input_size; bool has_bias = !prim->bias.empty(); + bool is_compressed = prim->compressed_weights; ob << input_size; ob << has_bias; + ob << is_compressed; std::vector prim_cache; prim_cache = _prim.get_cache_blob(); @@ -173,12 +237,19 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { size_t input_size = 2; bool has_bias = false; + bool is_compressed = false; ib >> input_size; ib >> has_bias; + ib >> is_compressed; const kernel_impl_params* impl_params = reinterpret_cast(ib.getKernelImplParams()); - auto prim_desc = get_fully_connected_primitive_descriptor(*impl_params, ib.get_engine(), input_size, has_bias, *_attrs); - _pd = *prim_desc; + if (is_compressed) { + auto prim_desc = get_matmul_primitive_descriptor(*impl_params, ib.get_engine(), input_size, has_bias, *_attrs); + _pd = *prim_desc; + } else { + auto prim_desc = get_inner_product_primitive_descriptor(*impl_params, ib.get_engine(), input_size, has_bias, *_attrs); + _pd = *prim_desc; + } std::vector prim_cache; ib >> prim_cache; @@ -194,10 +265,35 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { auto& config = impl_params.prog->get_config(); auto attr = arg.get_onednn_primitive_attributes(); auto prim = impl_params.typed_desc(); - auto prim_desc = get_fully_connected_primitive_descriptor(impl_params, impl_params.prog->get_engine(), - prim->input_size, !prim->bias.empty(), *attr); - return cldnn::make_unique(engine, config, attr, *prim_desc, get_weights_reorder(impl_params, *prim_desc)); + // There may be a performance difference between InnerProduct and MatMul primitives in oneDNN, + // so use MatMul only for weights compression and IP for all other cases. + if (prim->compressed_weights) { + attr->set_fpmath_mode(dnnl::fpmath_mode::f16, true); + if (!prim->decompression_scale.empty()) { + auto decompression_scale_idx = !arg.bias_term() ? 2 : 3; + auto data_type = convert_data_type(arg.get_dependency(decompression_scale_idx).get_output_layout().data_type); + attr->set_scales(DNNL_ARG_WEIGHTS, 1 << 1, dnnl::memory::dims{}, data_type); + } + + if (prim->decompression_zero_point_scalar.has_value()) { + OPENVINO_ASSERT(!prim->decompression_zero_point_scalar.has_value(), "[GPU] OneDNN can't use scalar as a zero point value\n"); + } else if (!prim->decompression_zero_point.empty()) { + auto decompression_zp_idx = !arg.bias_term() ? 3 : 4; + auto data_type = convert_data_type(arg.get_dependency(decompression_zp_idx).get_output_layout().data_type); + attr->set_zero_points(DNNL_ARG_WEIGHTS, 1 << 1, dnnl::memory::dims{}, data_type); + } + + auto prim_desc = get_matmul_primitive_descriptor(impl_params, impl_params.prog->get_engine(), + prim->input_size, !prim->bias.empty(), *attr); + + return cldnn::make_unique(engine, config, attr, *prim_desc); + } else { + auto prim_desc = get_inner_product_primitive_descriptor(impl_params, impl_params.prog->get_engine(), + prim->input_size, !prim->bias.empty(), *attr); + + return cldnn::make_unique(engine, config, attr, *prim_desc, get_weights_reorder(impl_params, *prim_desc)); + } } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index 38e8a5097bb666..0d6c00636c8fb6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -242,6 +242,9 @@ dnnl::memory::desc layout_to_memory_desc(cldnn::layout l, dnnl::memory::format_t } else if (target_fmt == dnnl::memory::format_tag::ab) { dims.push_back(l.batch()); dims.push_back(l.get_tensor().count() / l.batch()); + } else if (target_fmt == dnnl::memory::format_tag::ba) { + dims.push_back(l.feature()); + dims.push_back(l.get_tensor().count() / l.feature()); } else if (flatten) { dims = flatten_tensor(l.get_tensor()); } else { diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 6054281a9b3658..0ce52b7e1a3d36 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -895,12 +895,25 @@ static bool is_node_for_onednn(deconvolution_node const& node) { static bool is_node_for_onednn(fully_connected_node const& node) { - if (!layout_optimizer::are_data_types_suitable_for_onednn((program_node&)node)) - return false; - auto fc_prim = node.get_primitive(); - // onednn impl doesn't support compressed weights for now - if (fc_prim->compressed_weights) + + if (fc_prim->compressed_weights) { + auto weights_dt = node.weights().get_output_layout().data_type; + if (ov::element::Type(weights_dt).bitwidth() != 8) + return false; + + if (fc_prim->decompression_zero_point_scalar.has_value()) + return false; + + if (!fc_prim->decompression_zero_point.empty()) { + auto decompression_zp_idx = fc_prim->bias.empty() ? 3 : 4; + auto decompression_zp_dt = node.get_input_layout(decompression_zp_idx).data_type; + if (weights_dt != decompression_zp_dt) + return false; + } + } + + if (!layout_optimizer::are_data_types_suitable_for_onednn((program_node&)node)) return false; auto output_layout = node.get_output_layout(); @@ -1332,8 +1345,16 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) { return onednn_check_data_types_for_deconvolution(in_dt, wei_dt, out_dt); } else if (node.is_type() || node.is_type()) { bool is_fc = node.is_type(); - auto wei_dt = is_fc ? node.as().weights().get_output_layout(false).data_type : - node.as().get_input_layout(1).data_type; + data_types wei_dt; + if (is_fc) { + const auto& fc_node = node.as(); + const auto fc_prim = fc_node.get_primitive(); + wei_dt = fc_node.weights().get_output_layout(false).data_type; + if (fc_prim->compressed_weights && ov::element::Type(wei_dt).bitwidth() == 8) + return true; + } else { + wei_dt = node.as().get_input_layout(1).data_type; + } return onednn_check_data_types_for_fc_gemm(in_dt, wei_dt, out_dt); } else if (node.is_type()) { auto input_fmt = node.get_input_layout(0).format; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 8445d91c271bf8..7460a9599ab0d9 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1676,6 +1676,8 @@ event::ptr primitive_inst::update_weights() { // incorrect memory buffer may be assigned, so reset cached weights for such case _reordered_weights_cache.add(original_layout, original_weights_memory); _impl_params->weights_layout = optional_layout(original_layout); + GPU_DEBUG_TRACE_DETAIL << id() << ": add original weights memory " << original_layout.to_short_string() << " to weights cache; " + << "cache_size=" << _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl; } else { auto expected_layout = reorder_kernel_params->get_output_layout(); // Set original partial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl index 10f61b1dd15393..e9079c6fb395f3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl @@ -275,10 +275,10 @@ KERNEL(gemm_tiled_opt)( else #endif // INDIRECT_INPUT1 { - #if TILE_N_NOT_DIVISIBLE - b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; - #else + #if N_IS_ALIGNED_4BYTE b_tile[b_load_id] = BLOCK_READ_B(b_ptr, 0); + #else + b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; #endif b_ptr += input1_offset; } @@ -340,11 +340,11 @@ KERNEL(gemm_tiled_opt)( #if INDIRECT_INPUT0 uint a_idx = FUNC_CALL(get_input0_indirect_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (k * TILE_K + sglid), beam_table); A_FLOATN a_read = input0[a_idx]; -#elif TILE_K_NOT_DIVISIBLE - A_FLOATN a_read = a_ptr[sglid]; -#else // TILE_K_NOT_DIVISIBLE +#elif K_IS_ALIGNED_4BYTE A_FLOATN a_read = BLOCK_READ_A(a_ptr, 0); -#endif // TILE_K_NOT_DIVISIBLE +#else // K_IS_ALIGNED_4BYTE + A_FLOATN a_read = a_ptr[sglid]; +#endif // K_IS_ALIGNED_4BYTE #endif // IS_DYNAMIC a_ptr += input0_offset; @@ -486,11 +486,11 @@ KERNEL(gemm_tiled_opt)( else #endif { - #if TILE_N_NOT_DIVISIBLE - b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; - #else // TILE_N_NOT_DIVISIBLE + #if N_IS_ALIGNED_4BYTE b_tile[b_load_id] = BLOCK_READ_B(b_ptr, 0); - #endif // TILE_N_NOT_DIVISIBLE + #else // N_IS_ALIGNED_4BYTE + b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; + #endif // N_IS_ALIGNED_4BYTE b_ptr += input1_offset; } #elif TRANSPOSE_INPUT1 == TRANSPOSE_OTHER // TRANSPOSE_INPUT1 == 0 @@ -529,15 +529,23 @@ KERNEL(gemm_tiled_opt)( } #endif // TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST +#if !INDIRECT_INPUT0 && K_IS_ALIGNED_4BYTE && (TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST) + a_ptr = input0 + FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, (K_FULL_ITERATIONS * TILE_K)); +#endif // Loading leftovers of the matrix A and tile C calculation unroll_for (uint dot_id = 0; dot_id < tile_m_iterations; dot_id++) { #if INDIRECT_INPUT0 uint a_idx = FUNC_CALL(get_input0_indirect_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (K_FULL_ITERATIONS * TILE_K + sglid), beam_table); + INPUT0_TYPE a_read = input0[a_idx]; +#else // INDIRECT_INPUT0 +#if K_IS_ALIGNED_4BYTE && (TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST) + INPUT0_TYPE a_read = BLOCK_READ_A(a_ptr, 0); + a_ptr += input0_offset; #else uint a_idx = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (K_FULL_ITERATIONS * TILE_K + sglid)); -#endif INPUT0_TYPE a_read = input0[a_idx]; - +#endif +#endif // INDIRECT_INPUT0 unroll_for (uint simd_id = 0; simd_id < TILE_K_LEFTOVER; simd_id++) { c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_id)), b_tile[simd_id], c_tile[dot_id]); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 7c6ace7c20c7de..f39f7deaec6614 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -178,12 +178,13 @@ bool TuneParamsSelector::VerifyTuneParams(const fully_connected_params& params, return false; if (tparams.kernel_type == FullyConnected_bf_tiled::KernelType::SLM) { + bool is_i4_u4 = (params.weights.GetDType() == WeightsType::INT4 || params.weights.GetDType() == WeightsType::UINT4); const auto required_batch_alignment = 64; if (!params.is_shape_agnostic && (!IsAligned(output_b, required_batch_alignment) || output_b < 256)) return false; const auto required_tile_b = 8; - if (tparams.tile_b != required_tile_b) + if ((tparams.tile_b != required_tile_b) && !is_i4_u4) return false; const auto required_tile_ofm = 2; @@ -248,6 +249,10 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, } else { // Try to use SLM kernels if possible if (preferred_kernel_type != KernelType::DEFAULT) { + if (params.is_shape_agnostic) { + selector.Case(tune_params(16, 2, 2, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) + .Case(tune_params(16, 2, 1, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); + } selector.Case(tune_params(8, 2, 2, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) .Case(tune_params(8, 2, 1, 4, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp index 76d73acf501d05..acdd3bf84e5224 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp @@ -253,7 +253,7 @@ JitConstants GatherKernelRef::GetJitConstants(const gather_params& params) const if (!dyn_gather_idx_dim) jit.AddConstant(MakeJitConstant("AXIS_DIM", GetGatherMaxIndexDim(params))); - if (params.is_shape_agnostic) + if (params.is_shape_agnostic && params.inputs[0].is_dynamic()) jit.AddConstant(MakeJitConstant("GATHER_AXIS_SHAPE_INFO_INDEX", GetGatherAxisIndexInShapeInfo(params))); if (!params.fused_ops.empty()) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp index 56a97df8d0d0ab..eb3b9d2d8a1787 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp @@ -52,6 +52,7 @@ GemmKernelBase::DispatchData GemmKernelTiledOpt::SetDefault(const gemm_params& p (GetOuputSize(params.output_order, output, 'X') * GetOuputSize(params.output_order, output, 'Y')); std::vector global = { GetOuputSize(params.output_order, output, 'X'), GetOuputSize(params.output_order, output, 'Y'), total_batches }; + GPU_DEBUG_TRACE_DETAIL << "Draft for global work item size: [" << global[0] << ", " << global[1] << ", " << global[2] << "], " << std::endl; dispatchData.gws[0] = Align(global[0], td.tile_n_size) / (td.tile_n_size / td.simd_size); dispatchData.gws[1] = Align(global[1], td.tile_m_size) / td.tile_m_size; @@ -94,6 +95,11 @@ GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gem tuning_data.tile_k_size = tuning_data.simd_size; tuning_data.tile_m_size = tuning_data.simd_size; } + // Increasing tile_n_size has performance improvement when m_size and n_size are not shallow and n_size is aligned at 32. + if (m_size >= 128 && n_size >= 128 && (n_size % 32 == 0) && tuning_data.simd_size == 16 && params.fused_ops.empty()) + tuning_data.tile_n_size = 32; + + GPU_DEBUG_LOG << params.layerID << ": m_size: " << m_size << ", n_size: " << n_size << ", k_size: " << k_size << std::endl; } else { // In shape agnostic kernel case, the vector size of FusedOpsConfiguration cannot be specified at build time, // so the tile sizes must be the same as simd_size @@ -103,6 +109,11 @@ GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gem tuning_data.tile_m_size = tuning_data.simd_size; } + GPU_DEBUG_LOG << params.layerID << ": tile_m_size: " << tuning_data.tile_m_size + << ", tile_n_size: " << tuning_data.tile_n_size + << ", tile_k_size: " << tuning_data.tile_k_size + << ", simd_size: " << tuning_data.simd_size << std::endl; + return tuning_data; } @@ -212,6 +223,8 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons auto leftover_m = m_size % tuning_data.tile_m_size; auto leftover_n = n_size % tuning_data.tile_n_size; auto leftover_k = k_size % tuning_data.tile_k_size; + auto n_aligned_4byte = (n_size * BytesPerElement(params.inputs[0].GetDType())) % 4 == 0; + auto k_aligned_4byte = (k_size * BytesPerElement(params.inputs[0].GetDType())) % 4 == 0; jit.AddConstants({ MakeJitConstant("M", m_size), @@ -219,6 +232,8 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons MakeJitConstant("N", n_size), MakeJitConstant("K_PADDED_IN0", k_size), MakeJitConstant("N_PADDED", n_size), + MakeJitConstant("K_IS_ALIGNED_4BYTE", k_aligned_4byte), + MakeJitConstant("N_IS_ALIGNED_4BYTE", n_aligned_4byte), MakeJitConstant("SIMD_WIDTH", tuning_data.simd_size), MakeJitConstant("TILE_M", tuning_data.tile_m_size), MakeJitConstant("TILE_K", tuning_data.tile_k_size), diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index f1561c52cb495e..e8ea3c6af3df32 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -44,7 +44,10 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon auto convert_m = wrap_type({weights_m}); auto sub_const_m = wrap_type(consumers_count(1)); - auto subtract_m = wrap_type({convert_m, sub_const_m}); + auto sub_convert_const_m = wrap_type({sub_const_m}); + auto sub_with_convert_m = wrap_type({convert_m, sub_convert_const_m}); + auto sub_no_convert_m = wrap_type({convert_m, sub_const_m}); + auto subtract_m = std::make_shared(OutputVector{sub_with_convert_m, sub_no_convert_m}); auto mul_const_m = wrap_type(consumers_count(1)); auto mul_with_sub_m = wrap_type({subtract_m, mul_const_m}); @@ -97,7 +100,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon const auto& scale = reshape_const_to_2d(pattern_map.at(mul_const_m).get_node_shared_ptr()); std::shared_ptr optional_zero_point = nullptr; - const bool with_zero_point = pattern_map.count(subtract_m) > 0; + const bool with_zero_point = pattern_map.count(sub_no_convert_m) > 0 || pattern_map.count(sub_with_convert_m) > 0; if (with_zero_point) { optional_zero_point = reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr()); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index ee94acfbd95345..03c5118a7a8861 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -276,7 +276,16 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return !is_type(next_node); }); - manager.register_pass(ov::element::TypeVector{ov::element::u8, ov::element::u4, ov::element::i4}, true); + // Disable subtract folding only for the dGPUs to meet the requirements of oneDNN: + // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression + // types are not supported by oneDNN) + if (device_info.supports_immad) { + manager.register_pass(ov::element::TypeVector{ov::element::u8}, false); + manager.register_pass(ov::element::TypeVector{ov::element::u4, ov::element::i4}, true); + } else { + manager.register_pass(ov::element::TypeVector{ov::element::u8, ov::element::u4, ov::element::i4}, true); + } + // Need to check if transfomrations work correctly for mixed models with both compression and quantization at the same time. if (!is_model_quantized) pass_config->set_callback(is_non_supported_decompression_op); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 4928e6ef78e7bc..24a8b4854ad623 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -84,7 +84,6 @@ std::vector disabledTestPatterns() { // Issue: 129991 R"(.*StridedSliceLayerTest.*TS=.*2.2.4.1*.*)", // Issue: CVS-133173 - R"(.*smoke_GatherCompressedWeights_basic/GatherWeightsDecompression.Inference/data_shape=\[15,32\]_indices_shape=\[\?.\?\]_\[2.3\].*output_precision=f32.*)", R"(.*smoke_CTCLoss_Set2/CTCLossLayerTest.Inference/IS=\(\[\]\)_TS=\{\(3.6.8\)\}_LL=\(6.5.6\)_A=\(4.1.2.3.4.5\)\(5.4.3.0.1.0\)\(2.1.3.1.3.0\)_AL=\(3.3.5\)_BI=7_PCR=1_CMR=1_U=0_PF=f32_PI=i64.*)", R"(.*smoke_LPT/BatchToSpaceTransformation.CompareWithRefImpl/f16_GPU_\[4,3,50,86\]_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=.*)", R"(.*smoke_LPT/BatchToSpaceTransformation.CompareWithRefImpl/(f32|f16)_GPU_\[4,3,50,86\]_level=256_shape=\[1,3,1,1\]_input_low=\{ 0, 0, 0 \}_input_high=\{ 255, 127.5, 85 \}_output_low=\{ 0, 0, 0 \}_output_high\{ 255, 127.5, 85 \}_precision=.*)", diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index a1d517554d9c28..80026e38372600 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1200,6 +1200,109 @@ class fully_connected_gpu_tests: public ::testing::Test { } } + void test_compressed_int8_scale(bool is_caching_test, bool is_dynamic, int64_t batch_num, bool use_bias = false, bool use_zp = false, bool is_3d = false) { + tests::random_generator rg(GET_SUITE_NAME); + auto& engine = get_test_engine(); + + int64_t ifm_num = 33; + int64_t ofm_num = 65; + + auto in_shape = is_3d ? ov::PartialShape({batch_num, 1, ifm_num}) : ov::PartialShape({batch_num, ifm_num}); + auto bias_shape = is_3d ? ov::PartialShape({1, 1, ofm_num}) : ov::PartialShape({1, ofm_num}); + auto input_mem = engine.allocate_memory({ in_shape, data_types::f16, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {ofm_num, ifm_num}, data_types::u8, format::bfyx }); + auto bias_mem = engine.allocate_memory({ bias_shape, data_types::f16, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {ofm_num, 1}, data_types::f16, format::bfyx }); + auto zp_mem = engine.allocate_memory({ {ofm_num, 1}, data_types::u8, format::bfyx }); + + auto input_data = rg.generate_random_1d(batch_num * ifm_num, -1.0f, 1.0f); + set_values(input_mem, input_data); + + auto weigths_data = rg.generate_random_1d(ofm_num * ifm_num, 0, 10); + set_values(weights_mem, weigths_data); + + auto bias_data = rg.generate_random_1d(ofm_num, -2.0f, 2.0f);; + set_values(bias_mem, bias_data); + + auto scale_data = rg.generate_random_1d(ofm_num, -1.0f, 1.0f); + set_values(scale_mem, scale_data); + + auto zp_data = rg.generate_random_1d(ofm_num, 0, 4); + set_values(zp_mem, zp_data); + + auto in_partial_shape = is_3d ? ov::PartialShape({-1, -1, ifm_num}) : ov::PartialShape({-1, ifm_num}); + auto in_layout = is_dynamic ? layout{ in_partial_shape, data_types::f16, format::bfyx } + : layout{ {batch_num, ifm_num}, data_types::f16, format::bfyx }; + + auto bias_id = use_bias ? "bias" : ""; + auto zp_id = use_zp ? "zp" : ""; + + auto fc_prim = fully_connected("fc_prim", input_info("input"), + "weights", bias_id, + "scale", zp_id, + data_types::f16, + padding(), + in_shape.size(), 2); + + auto get_ref_results = [&]() { + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + topology topology( + input_layout("input", in_layout), + data("weights", weights_mem), + data("scale", scale_mem), + data("zp", zp_mem), + data("bias", bias_mem), + fc_prim + ); + + network network(engine, topology, config); + network.set_input_data("input", input_mem); + + auto outputs = network.execute(); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(outputs.begin()->first == "fc_prim"); + + auto output_layout = outputs.begin()->second.get_layout(); + auto output_mem = outputs.begin()->second.get_memory(); + + return engine.reinterpret_buffer(*output_mem, output_layout); + }; + + topology topology( + input_layout("input", in_layout), + data("weights", weights_mem), + data("scale", scale_mem), + data("zp", zp_mem), + data("bias", bias_mem), + fc_prim + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + + network->set_input_data("input", input_mem); + + auto outputs = network->execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc_prim"); + + auto output_mem = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr (output_mem, get_test_stream()); + + auto ref_output_mem = get_ref_results(); + cldnn::mem_lock output_ptr_ref (ref_output_mem, get_test_stream()); + + const float threshold_fp16 = 1e-1; + for (size_t i = 0; i < output_ptr_ref.size(); i++) { + ASSERT_NEAR(output_ptr_ref[i], output_ptr[i], threshold_fp16) << "i = " << i; + } + } + void test_compressed_scale_bias(bool is_caching_test) { auto& engine = get_test_engine(); @@ -2764,6 +2867,30 @@ TEST_F(fully_connected_gpu_tests, compressed_scale_fp16_cached) { this->test_compressed_scale_fp16(false); } +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_b1) { + this->test_compressed_int8_scale(false, true, 1, false, false); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_b1_bias) { + this->test_compressed_int8_scale(false, true, 1, true, false); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_b1_bias_zp_3d) { + this->test_compressed_int8_scale(false, true, 1, true, true, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_zp_b1) { + this->test_compressed_int8_scale(false, true, 1, false, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_zp_b13) { + this->test_compressed_int8_scale(false, true, 13, false, true); +} + +TEST_F(fully_connected_gpu_tests, compressed_int8_scale_zp_b12_3d) { + this->test_compressed_int8_scale(false, true, 12, false, true, true); +} + TEST_F(fully_connected_gpu_tests, dynamic) { this->test_dynamic(false); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp index a4543197bcbd0a..37a1ba8b982414 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp @@ -533,3 +533,42 @@ TEST(non_zero_gpu, const_input) { ASSERT_FLOAT_EQ(output_ptr[i], out_data[i]); } } + +TEST(non_zero_gpu, empty_input) { + auto& engine = get_test_engine(); + auto in_layout = layout{ov::PartialShape{1, -1}, data_types::f32, format::bfyx}; + auto in_data_layout = layout{ov::PartialShape{1, 0}, data_types::f32, format::bfyx}; + auto input_data_mem = engine.allocate_memory(in_data_layout); + + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(count_nonzero("count_nonzero", input_info("input"))); + topology.add(gather_nonzero("gather_nonzero", input_info("input"), input_info("count_nonzero"))); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network net(engine, topology, config); + + net.set_input_data("input", input_data_mem); + + auto count_nonzero_inst = net.get_primitive("count_nonzero"); + + // Put some value into out buffer to ensure that it's non empty + // That is needed to ensure that implementation correctly handles the cases when input tensor is empty and set count non zero to 0 + count_nonzero_inst->output_memory(0).fill(engine.get_service_stream(), 1); + engine.get_service_stream().finish(); + + auto count_nonzero_impl = count_nonzero_inst->get_impl(); + ASSERT_TRUE(count_nonzero_impl != nullptr); + + auto gather_nonzero_inst = net.get_primitive("gather_nonzero"); + auto gather_nonzero_impl = gather_nonzero_inst->get_impl(); + ASSERT_TRUE(gather_nonzero_impl != nullptr); + ASSERT_TRUE(gather_nonzero_impl->is_dynamic()); + + auto outputs = net.execute(); + + auto output = outputs.at("gather_nonzero").get_memory(); + ASSERT_EQ(output, nullptr); +} diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp index 1c7ebe72990ae4..12398c8221f4b7 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp @@ -401,6 +401,34 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { } } +TEST_F(TransformationTestsF, ConvertFCToCompressed9) { + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f16); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 1 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f16); + auto sub = std::make_shared(convert, zp_convert); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 1 }, { 1 }); + auto no_bias = std::make_shared(); + auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + } // namespace intel_gpu } // namespace test } // namespace ov diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt index b7929efacd57b6..5b08fe0c476704 100644 --- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt @@ -112,7 +112,7 @@ if(ENABLE_ONEDNN_FOR_GPU) "-DDNNL_LIBRARY_NAME=${DNNL_GPU_LIBRARY_NAME}" "-DCMAKE_INSTALL_PREFIX=${ONEDNN_INSTALL_DIR}" "-DDNNL_ENABLE_CONCURRENT_EXEC=ON" - "-DDNNL_ENABLE_PRIMITIVE_CACHE=OFF" + "-DDNNL_ENABLE_PRIMITIVE_CACHE=ON" "-DDNNL_ENABLE_WORKLOAD=INFERENCE" "-DDNNL_ENABLE_JIT_PROFILING=${BUILD_SHARED_LIBS}" "-DDNNL_ENABLE_ITT_TASKS=${BUILD_SHARED_LIBS}" diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu index 494af5f9921bda..26c5598cccbc14 160000 --- a/src/plugins/intel_gpu/thirdparty/onednn_gpu +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu @@ -1 +1 @@ -Subproject commit 494af5f9921bdae98f1a0e2955fa7d76ff386c4f +Subproject commit 26c5598cccbc144ff49255a0b44f00cb9b19e6f3 diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt index e51aed2fc05f4c..af5c0ce5e5f5b5 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt @@ -1 +1 @@ -AUTO_BATCH_DEVICE_CONFIG TEMPLATE \ No newline at end of file +MULTI_DEVICE_PRIORITIES TEMPLATE \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt index 4c8a8807b6aba4..af5c0ce5e5f5b5 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt @@ -1 +1 @@ -TARGET_FALLBACK TEMPLATE \ No newline at end of file +MULTI_DEVICE_PRIORITIES TEMPLATE \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp index 1943db27f9285f..c9cffe7c8139c8 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp @@ -139,7 +139,7 @@ void ReadIRTest::SetUp() { if (!in_info.is_const) { continue; } - ov::test::utils::set_const_ranges(in_info.ranges.min, in_info.ranges.max); + utils::ConstRanges::set(in_info.ranges.min, in_info.ranges.max); // auto next_node = param->get_default_output().get_node_shared_ptr(); auto next_node = param->get_default_output().get_target_inputs().begin()->get_node()->shared_from_this(); auto it = inputMap.find(next_node->get_type_info()); @@ -148,7 +148,7 @@ void ReadIRTest::SetUp() { const_node->set_friendly_name(param->get_friendly_name()); ov::replace_node(param, const_node); parameter_to_remove.push_back(param); - ov::test::utils::reset_const_ranges(); + utils::ConstRanges::reset(); } for (const auto& param : parameter_to_remove) { function->remove_parameter(param); diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp index 6e1af8f967a6b8..ff1e4fd2fb000b 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/generate_inputs.hpp @@ -12,9 +12,6 @@ namespace ov { namespace test { namespace utils { -void set_const_ranges(double _min, double _max); -void reset_const_ranges(); - std::vector color_test_image(size_t height, size_t width, int b_step, ov::preprocess::ColorFormat format); using InputsMap = std::map color_test_image(size_t height, size_t width, int b_step, ov::preprocess::ColorFormat format) { // Test all possible r/g/b values within dimensions int b_dim = 255 / b_step + 1; @@ -113,16 +95,6 @@ ov::Tensor generate(const std::shared_ptr& node, const ov::Shape& targetShape) { InputGenerateData inGenData; - if (const_range.is_defined) { - auto min_orig = inGenData.start_from; - auto max_orig = inGenData.start_from + inGenData.range * inGenData.resolution; - auto min_ref = const_range.min; - auto max_ref = const_range.max; - if (min_orig < min_ref || min_orig == 0) - inGenData.start_from = min_ref; - inGenData.range = (max_orig > max_ref || max_orig == 10 ? max_ref : max_orig - inGenData.start_from) - inGenData.start_from; - } - if (elemType.is_real()) { set_real_number_generation_data(inGenData); } diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp index aa36a4ab140b41..2b6da6fe54a776 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp @@ -9,6 +9,25 @@ namespace ov { namespace test { namespace utils { + +// todo: remove w/a to generate correct constant data (replace parameter to const) in conformance with defined range +struct ConstRanges { + static double max, min; + static bool is_defined; + + static void set(double _min, double _max) { + min = _min; + max = _max; + is_defined = true; + } + + static void reset() { + min = std::numeric_limits::max(); + max = std::numeric_limits::min(); + is_defined = false; + } +}; + struct InputGenerateData { double start_from = 0; uint32_t range = 10; @@ -19,7 +38,18 @@ struct InputGenerateData { : start_from(_start_from), range(_range), resolution(_resolution), - seed(_seed){}; + seed(_seed) { + if (ConstRanges::is_defined) { + auto min_orig = start_from; + auto max_orig = start_from + range * resolution; + auto min_ref = ConstRanges::min; + auto max_ref = ConstRanges::max; + if (min_orig < min_ref || min_orig == 0) + start_from = min_ref; + range = + (uint32_t)round((max_orig > max_ref || max_orig == 10 ? max_ref : max_orig - start_from) - start_from); + } + }; }; ov::Tensor create_and_fill_tensor(const ov::element::Type element_type, diff --git a/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp b/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp index 70c3dbc26fd939..262ceb30187137 100644 --- a/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp +++ b/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp @@ -9,7 +9,6 @@ #include "openvino/op/bitwise_not.hpp" #include "openvino/op/bitwise_or.hpp" #include "openvino/op/bitwise_xor.hpp" -#include "openvino/op/convert.hpp" #include "openvino/op/divide.hpp" #include "openvino/op/erf.hpp" #include "openvino/op/floor_mod.hpp" diff --git a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp index 435dbf63257efd..78bf8e02810874 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp @@ -12,6 +12,11 @@ namespace ov { namespace test { namespace utils { + +double ConstRanges::max = std::numeric_limits::min(); +double ConstRanges::min = std::numeric_limits::max(); +bool ConstRanges::is_defined = false; + ov::Tensor create_and_fill_tensor(const ov::element::Type element_type, const ov::Shape& shape, const InputGenerateData& inGenData) { diff --git a/tests/constraints.txt b/tests/constraints.txt index 1492e0863cac1a..f3569f126cc3fb 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -8,11 +8,12 @@ pymongo>=3.12.0 PyYAML>=5.4.1 scipy>=1.7; python_version <= "3.8" scipy>=1.11.1; python_version >= "3.9" +mpmath<1.4 sympy>=1.10 wheel>=0.38.1 defusedxml>=0.7.1 fastjsonschema~=2.17.1 -tensorflow>=2.5,<2.16.0 +tensorflow>=2.5,<2.17.0 test-generator==0.1.2 requests>=2.25.1 opencv-python>=4.5 diff --git a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py index 5f750896963935..4bb291c9d51178 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py @@ -479,11 +479,11 @@ def test_pytorch_decoder_can_convert_empty_list(): class aten_roll(torch.nn.Module): def __init__(self, shifts): super(aten_roll, self).__init__() - self.shits = shifts + self.shifts = shifts def forward(self, x): # roll has optional input dim, which is empty int list by default - return torch.roll(x, self.shits) + return torch.roll(x, self.shifts) model = get_scripted_model(aten_roll(1)) consts = [n for n in model.inlined_graph.nodes() if n.kind() == diff --git a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py index d3b5153da24be4..98369f2953a03e 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py @@ -260,6 +260,7 @@ def forward(self, inp): assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ "Parameter", "CustomElu", "Result"] + def test_framework_map_macros(): from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder @@ -291,17 +292,18 @@ def forward(self, x): "Parameter", "ReluCustom", "Result"] +class CosModel(torch.nn.Module): + def __init__(self): + super(CosModel, self).__init__() + + def forward(self, x): + return torch.cos(x.to(torch.float32)) + + def test_op_extension(): from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder from openvino.frontend.pytorch import OpExtension - class CosModel(torch.nn.Module): - def __init__(self): - super(CosModel, self).__init__() - - def forward(self, x): - return torch.cos(x.to(torch.float32)) - model = CosModel() decoder = TorchScriptPythonDecoder(get_scripted_model(model)) @@ -327,13 +329,6 @@ def test_op_extension_generic(): from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder from openvino.frontend import OpExtension - class CosModel(torch.nn.Module): - def __init__(self): - super(CosModel, self).__init__() - - def forward(self, x): - return torch.cos(x.to(torch.float32)) - model = CosModel() decoder = TorchScriptPythonDecoder(get_scripted_model(model)) @@ -355,6 +350,97 @@ def forward(self, x): "Parameter", "Convert", "Sin", "Result"] +def test_module_extension(): + from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder + from openvino.frontend.pytorch import ModuleExtension, ConversionExtension + from openvino import convert_model + + class ModelWithModule(torch.nn.Module): + def __init__(self): + super(ModelWithModule, self).__init__() + self.cos_module = CosModel() + + def forward(self, x): + return self.cos_module(x) + + model = ModelWithModule() + decoder = TorchScriptPythonDecoder(model) + + fem = FrontEndManager() + fe = fem.load_by_framework(framework="pytorch") + assert fe + + input_model = fe.load(decoder) + assert input_model + converted_model = fe.convert(input_model) + assert converted_model + assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ + "Parameter", "Convert", "Cos", "Result"] + + converted_model = convert_model(model, example_input=( + torch.randn(100),), extension=[ModuleExtension(CosModel, "aten::sin")]) + assert converted_model + assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ + "Parameter", "Sin", "Result"] + + converted_model = convert_model(model, example_input=(torch.randn( + 100),), extension=[ModuleExtension(model.cos_module, "aten::sin")]) + assert converted_model + assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ + "Parameter", "Sin", "Result"] + + converted_model = convert_model(model, example_input=(torch.randn( + 100),), extension=[ModuleExtension("cos_module", "aten::sin")]) + assert converted_model + assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ + "Parameter", "Sin", "Result"] + + def sin_op(context): + return ops.sin(context.get_input(0)).outputs() + + converted_model = convert_model(model, example_input=(torch.randn(100),), extension=[ + ModuleExtension("cos_module", "MyOp"), ConversionExtension("MyOp", sin_op)]) + assert converted_model + assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ + "Parameter", "Sin", "Result"] + + +def test_multiple_module_extension(): + from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder + from openvino.frontend.pytorch import ModuleExtension + from openvino import convert_model + + class ModelWithModule(torch.nn.Module): + def __init__(self): + super(ModelWithModule, self).__init__() + self.cos_module = CosModel() + self.relu_module = torch.nn.ReLU() + + def forward(self, x): + x = x.to(torch.float32) + return self.cos_module(x) + self.relu_module(x) + + model = ModelWithModule() + decoder = TorchScriptPythonDecoder(model) + + fem = FrontEndManager() + fe = fem.load_by_framework(framework="pytorch") + assert fe + + input_model = fe.load(decoder) + assert input_model + converted_model = fe.convert(input_model) + assert converted_model + assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ + "Parameter", "Convert", "Convert", "Cos", "Constant", "Relu", "Multiply", "Add", "Result"] + + converted_model = convert_model(model, example_input=( + torch.randn(100),), extension=[ModuleExtension(CosModel, "aten::sin"), ModuleExtension(model.relu_module, "aten::tan")]) + assert converted_model + assert [n.get_type_name() for n in converted_model.get_ordered_ops()] == [ + "Parameter", "Sin", "Tan", "Add", "Result"] + + def test_pytorch_telemetry(): from openvino.frontend import TelemetryExtension from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder @@ -500,25 +586,31 @@ def forward(self, x: float, y: torch.Tensor): r_t = "t" if isinstance(l_type, type): - ov_lhs = ops.parameter(PartialShape([]), pt_to_ov_type_map.get(l_type.__name__)) + ov_lhs = ops.parameter(PartialShape( + []), pt_to_ov_type_map.get(l_type.__name__)) pt_lhs = l_type(5) l_t = l_type.__name__ elif l_scalar: - ov_lhs = ops.parameter(PartialShape([]), pt_to_ov_type_map.get(str(l_type))) + ov_lhs = ops.parameter(PartialShape( + []), pt_to_ov_type_map.get(str(l_type))) pt_lhs = torch.tensor(1, dtype=l_type) else: - ov_lhs = ops.parameter(PartialShape([2, 2]), pt_to_ov_type_map.get(str(l_type))) + ov_lhs = ops.parameter(PartialShape( + [2, 2]), pt_to_ov_type_map.get(str(l_type))) pt_lhs = torch.rand([2, 2]).to(dtype=l_type) if isinstance(r_type, type): - ov_rhs = ops.parameter(PartialShape([]), pt_to_ov_type_map.get(r_type.__name__)) + ov_rhs = ops.parameter(PartialShape( + []), pt_to_ov_type_map.get(r_type.__name__)) pt_rhs = r_type(5) r_t = r_type.__name__ elif r_scalar: - ov_rhs = ops.parameter(PartialShape([]), pt_to_ov_type_map.get(str(r_type))) + ov_rhs = ops.parameter(PartialShape( + []), pt_to_ov_type_map.get(str(r_type))) pt_rhs = torch.tensor(1, dtype=r_type) else: - ov_rhs = ops.parameter(PartialShape([2, 2]), pt_to_ov_type_map.get(str(r_type))) + ov_rhs = ops.parameter(PartialShape( + [2, 2]), pt_to_ov_type_map.get(str(r_type))) pt_rhs = torch.rand([2, 2]).to(dtype=r_type) model = get_scripted_model(locals().get(f"aten_add_{l_t}_{r_t}")()) decoder = TorchScriptPythonDecoder(model) @@ -542,12 +634,13 @@ def forward(self, x: float, y: torch.Tensor): pt_out_type = pt_to_ov_type_map.get(str(pt_out_type)) ov_out_type = om.get_output_element_type(0) if pt_out_type == Type.i64 and ov_out_type == Type.i32 and "int" in [l_t, r_t]: - pytest.xfail("Pytorch int-like scalar in OV is converted to i32 instead of i64, mismatch is expected.") + pytest.xfail( + "Pytorch int-like scalar in OV is converted to i32 instead of i64, mismatch is expected.") assert pt_out_type == ov_out_type assert PartialShape(pt_out_shape) == om.get_output_partial_shape(0) -class TestModel1(torch.nn.Module): +class ModelTest1(torch.nn.Module): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.pool = torch.nn.AdaptiveAvgPool2d(1) @@ -559,8 +652,7 @@ def forward(self, x): def test_output_dict_names(): from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder - input = torch.ones((1, 3, 224, 224)) - model = TestModel1() + model = ModelTest1() decoder = TorchScriptPythonDecoder( model, example_input=(torch.randn(1, 3, 224, 224),)) fe_manager = FrontEndManager() @@ -570,7 +662,7 @@ def test_output_dict_names(): assert om.outputs[0].any_name == "x1" and om.outputs[1].any_name == "x2", "Output dict names are not expected" -class TestModel2(torch.nn.Module): +class ModelTest2(torch.nn.Module): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.pool = torch.nn.AdaptiveAvgPool2d(1) @@ -582,8 +674,7 @@ def forward(self, x): def test_output_tuple_names(): from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder - input = torch.ones((1, 3, 224, 224)) - model = TestModel2() + model = ModelTest2() decoder = TorchScriptPythonDecoder( model, example_input=(torch.randn(1, 3, 224, 224),)) fe_manager = FrontEndManager() diff --git a/tests/layer_tests/pytorch_tests/test_addcmul.py b/tests/layer_tests/pytorch_tests/test_addcmul.py index 5dde37fb609812..9812bbb1b329c9 100644 --- a/tests/layer_tests/pytorch_tests/test_addcmul.py +++ b/tests/layer_tests/pytorch_tests/test_addcmul.py @@ -47,6 +47,7 @@ def forward(self, x, y, z): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_addcmul(self, input_type, value, ie_device, precision, ir_version): self.input_type = input_type self._test(*self.create_model(value), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_all.py b/tests/layer_tests/pytorch_tests/test_all.py index de91f90cb69d60..6e4b1c494302fc 100644 --- a/tests/layer_tests/pytorch_tests/test_all.py +++ b/tests/layer_tests/pytorch_tests/test_all.py @@ -77,6 +77,7 @@ def _prepare_input(self, out=False): @pytest.mark.parametrize("out", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_all_noparams(self, input_shape, d_type, out, ie_device, precision, ir_version): if type(input_shape) is list: self.input_tensor = np.random.randint(0, 2, input_shape, dtype=d_type) @@ -104,6 +105,7 @@ def test_all_noparams(self, input_shape, d_type, out, ie_device, precision, ir_v @pytest.mark.parametrize("out", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ('arm', 'armv7l', 'aarch64', 'arm64', 'ARM64'), diff --git a/tests/layer_tests/pytorch_tests/test_any.py b/tests/layer_tests/pytorch_tests/test_any.py new file mode 100644 index 00000000000000..0bc0ca79a55d70 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_any.py @@ -0,0 +1,50 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestAny(PytorchLayerTest): + def _prepare_input(self): + import numpy as np + return ((np.random.randint(2, size=(3,3,10,10)) > 0),) + + def create_model(self, dim=None, keep_dim=None): + + import torch + class aten_any(torch.nn.Module): + def __init__(self, dim=None, keep_dim=None): + super(aten_any, self).__init__() + + if dim == None: + self.forward = self.forward_default + else: + self.forward = self.forward_dim + self.dim = dim + self.keep_dim = keep_dim + + def forward_default(self, x): + return torch.any(x) + + def forward_dim(self, x): + return torch.any(x, dim=self.dim, keepdim=self.keep_dim) + + + ref_net = None + + return aten_any(dim, keep_dim), ref_net, "aten::any" + + + @pytest.mark.precommit_fx_backend + def test_any_default(self, ie_device, precision, ir_version): + self._test(*self.create_model(), + ie_device, precision, ir_version) + + @pytest.mark.parametrize(("dim", "keep_dim"), + [(0, False), (0, True), (-1, True)]) + @pytest.mark.precommit_fx_backend + def test_any_dim(self, dim, keep_dim, ie_device, precision, ir_version): + self._test(*self.create_model(dim, keep_dim), + ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_arange.py b/tests/layer_tests/pytorch_tests/test_arange.py index fb8cea4b14b9c1..d871b09c0e4686 100644 --- a/tests/layer_tests/pytorch_tests/test_arange.py +++ b/tests/layer_tests/pytorch_tests/test_arange.py @@ -109,6 +109,7 @@ def forward(self, x, y, z, d): @pytest.mark.nightly @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", [None, "float32", "float64", "int32", "int64", "int8", "uin8"]) @pytest.mark.parametrize("end", [1, 2, 3]) @pytest.mark.parametrize("use_out", [skip_if_export(True), False]) @@ -117,6 +118,7 @@ def test_arange_end_only(self, dtype, end, use_out, ie_device, precision, ir_ver kwargs_to_prepare_input={"end": end}) @pytest.mark.nightly + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", [None, "float32", "float64", "int32", "int64", "int8"]) @pytest.mark.parametrize("start,end", [(0, 1), (-1, 1), (1, 5), (0.5, 2.5)]) def test_arange_start_end(self, dtype, end, start, ie_device, precision, ir_version): @@ -125,6 +127,7 @@ def test_arange_start_end(self, dtype, end, start, ie_device, precision, ir_vers @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", [None, "float32", "float64", "int32", "int64", "int8"]) @pytest.mark.parametrize("start,end,step", [(0, 1, 1), (-2, 1, 1.25), (1, -5, -1), (1, 10, 2), (-1, -5, -2)]) def test_arange_start_end_step(self, dtype, end, start, step, ie_device, precision, ir_version): @@ -133,6 +136,7 @@ def test_arange_start_end_step(self, dtype, end, start, step, ie_device, precisi @pytest.mark.nightly @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64", "int8", "uint8"]) @pytest.mark.parametrize("end", [1, 2, 3]) def test_arange_end_only_with_prim_dtype(self, dtype, end, ie_device, precision, ir_version): @@ -140,6 +144,7 @@ def test_arange_end_only_with_prim_dtype(self, dtype, end, ie_device, precision, kwargs_to_prepare_input={"end": end, "ref_dtype": dtype}) @pytest.mark.nightly + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64", "int8"]) @pytest.mark.parametrize("start,end", [(0, 1), (-1, 1), (1, 5), (0.5, 2.5)]) def test_arange_start_end_with_prim_dtype(self, dtype, end, start, ie_device, precision, ir_version): @@ -148,6 +153,7 @@ def test_arange_start_end_with_prim_dtype(self, dtype, end, start, ie_device, pr @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64", "int8"]) @pytest.mark.parametrize("start,end,step", [(0, 1, 1), (-2, 1, 1.25), (1, -5, -1), (1, 10, 2), (-1, -5, -2)]) def test_arange_start_end_step_with_prim_dtype(self, dtype, end, start, step, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py index b033a2980de8af..db609894a10c3e 100644 --- a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py +++ b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py @@ -74,6 +74,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ('arm', 'armv7l', 'aarch64', 'arm64', 'ARM64'), diff --git a/tests/layer_tests/pytorch_tests/test_as_strided.py b/tests/layer_tests/pytorch_tests/test_as_strided.py index 964e9319ef5278..254084c89648dd 100644 --- a/tests/layer_tests/pytorch_tests/test_as_strided.py +++ b/tests/layer_tests/pytorch_tests/test_as_strided.py @@ -41,6 +41,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_as_strided(self, size, stride, offset, ie_device, precision, ir_version): self._test(*self.create_model(size, stride, offset), ie_device, precision, ir_version, trace_model=True) @@ -92,6 +93,7 @@ def forward_size_const(self, x, size_shape_tensor, stride_shape_tensor): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_as_strided_list_construct(self, size, stride, offset, mode, ie_device, precision, ir_version): inp_kwargs = {"size_shape_tensor": size, "stride_shape_tensor": stride} self._test( @@ -124,5 +126,6 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_as_strided_lf(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, freeze_model=False) diff --git a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py index 248eb44e35402e..72578bd75f2625 100644 --- a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py +++ b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py @@ -55,6 +55,7 @@ def forward_not_out(self, tensor_a, out): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("op_type", ["and", "or", "not", "xor"]) @pytest.mark.parametrize("lhs_dtype", ["bool", "int32", "uint8", "int64"]) @pytest.mark.parametrize("rhs_dtype", ["bool", "int32", "uint8", "int64"]) @@ -107,6 +108,7 @@ def forward(self, lhs, rhs): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("lhs_dtype", ["bool", "int32"]) @pytest.mark.parametrize("rhs_dtype", ["bool", "int32"]) @pytest.mark.parametrize( diff --git a/tests/layer_tests/pytorch_tests/test_clamp.py b/tests/layer_tests/pytorch_tests/test_clamp.py index 947f9197f72d35..b8a977a897046b 100644 --- a/tests/layer_tests/pytorch_tests/test_clamp.py +++ b/tests/layer_tests/pytorch_tests/test_clamp.py @@ -48,6 +48,7 @@ def forward_clip_(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_clamp(self, minimum, maximum, as_tensors, op_type, ie_device, precision, ir_version): self._test(*self.create_model(minimum, maximum, as_tensors, op_type), ie_device, precision, ir_version) @@ -76,6 +77,7 @@ def forward(self, x): @pytest.mark.parametrize("minimum", [0., 1., -1., 0.5, 2]) @pytest.mark.parametrize("as_tensor", [True, False]) @pytest.mark.nightly + @pytest.mark.precommit_fx_backend def test_clamp_min(self, minimum, as_tensor, ie_device, precision, ir_version): self._test(*self.create_model(minimum, as_tensor), ie_device, precision, ir_version, use_convert_model=True, trace_model=True) @@ -106,6 +108,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_clamp(self, maximum, as_tensor, ie_device, precision, ir_version): self._test(*self.create_model(maximum, as_tensor), ie_device, precision, ir_version, use_convert_model=True, trace_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_comparision.py b/tests/layer_tests/pytorch_tests/test_comparision.py index 8267a95d1c73a4..dee86407bb6051 100644 --- a/tests/layer_tests/pytorch_tests/test_comparision.py +++ b/tests/layer_tests/pytorch_tests/test_comparision.py @@ -55,6 +55,7 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_comp(self, op, ie_device, precision, ir_version): self._test(*self.create_model(op), ie_device, precision, ir_version, use_convert_model=True) @@ -127,6 +128,7 @@ def forward3(self, lhs, rhs): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_eq_mixed_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape, op): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_constant_pad_nd.py b/tests/layer_tests/pytorch_tests/test_constant_pad_nd.py new file mode 100644 index 00000000000000..7a92983bb1819d --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_constant_pad_nd.py @@ -0,0 +1,37 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestConstantPadND(PytorchLayerTest): + def _prepare_input(self): + import numpy as np + return (np.random.randn(2, 5, 3, 4).astype(np.float32),) + + def create_model(self, pad, value): + + import torch + class aten_constant_pad_nd(torch.nn.Module): + def __init__(self, pad=None, value=None): + super(aten_constant_pad_nd, self).__init__() + self.pad = pad + self.value = value + + def forward(self, x): + return torch.constant_pad_nd(x, self.pad, self.value); + + + ref_net = None + + return aten_constant_pad_nd(pad, value), ref_net, "aten::constant_pad_nd" + + @pytest.mark.parametrize(("pad", "value"), + [((1,1,1,1), 0),((0,2,0,2), -1.0),((3,1,5,2), 0.5),((0,0,0,0), 0),]) + + @pytest.mark.precommit_fx_backend + def test_constant_pad_nd(self, pad, value, ie_device, precision, ir_version): + self._test(*self.create_model(pad, value), + ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_copy.py b/tests/layer_tests/pytorch_tests/test_copy.py index 1adff2f36d6536..c2a387a5358b00 100644 --- a/tests/layer_tests/pytorch_tests/test_copy.py +++ b/tests/layer_tests/pytorch_tests/test_copy.py @@ -28,6 +28,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("value", [1, [2.5], range(224)]) def test_copy_(self, value, ie_device, precision, ir_version): self._test(*self.create_model(value), ie_device, precision, ir_version) @@ -63,4 +64,4 @@ def forward_out(self, x, y): @pytest.mark.precommit @pytest.mark.parametrize("out", [True, False]) def test_copy_(self, out, ie_device, precision, ir_version): - self._test(*self.create_model(out), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out}) \ No newline at end of file + self._test(*self.create_model(out), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out}) diff --git a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py index 6e21d41a86df9f..45fac8ea0b8c43 100644 --- a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py +++ b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py @@ -170,6 +170,7 @@ def forward(self, x): @pytest.mark.parametrize("mask", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_deformable_convolution2d(self, params, bias, mask, ie_device, precision, ir_version): self._test( *self.create_model(**params, bias=bias, mask=mask), ie_device, precision, ir_version, trace_model=True diff --git a/tests/layer_tests/pytorch_tests/test_div.py b/tests/layer_tests/pytorch_tests/test_div.py index 3ae112de0e2699..ad6769ded6504e 100644 --- a/tests/layer_tests/pytorch_tests/test_div.py +++ b/tests/layer_tests/pytorch_tests/test_div.py @@ -44,6 +44,7 @@ def forward(self, input_tensor, other_tensor): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_div_pt_spec(self, input_array, other_array, rounding_mode, ie_device, precision, ir_version): self.input_array = input_array self.input_type = np.float32 diff --git a/tests/layer_tests/pytorch_tests/test_embedding_bag.py b/tests/layer_tests/pytorch_tests/test_embedding_bag.py index 2c8d289f7e0035..e02eb8f7866a0a 100644 --- a/tests/layer_tests/pytorch_tests/test_embedding_bag.py +++ b/tests/layer_tests/pytorch_tests/test_embedding_bag.py @@ -42,6 +42,7 @@ def forward_offsets_per_sample_weights(self, indicies, weight, offsets, per_samp @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("indicies_dtype", ["int", "int32"]) @pytest.mark.parametrize("per_sample_weights", [True, False]) @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', @@ -86,6 +87,7 @@ def forward_per_sample_weights(self, indicies, weight, per_sample_wights): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("indicies_size", [[1, 1], [2, 5], [3, 10], [4, 7]]) @pytest.mark.parametrize("indicies_dtype", ["int", "int32"]) @pytest.mark.parametrize("per_sample_weights", [True, False]) diff --git a/tests/layer_tests/pytorch_tests/test_fake_quantize.py b/tests/layer_tests/pytorch_tests/test_fake_quantize.py index c3283279bb4aa3..0963b646d4d526 100644 --- a/tests/layer_tests/pytorch_tests/test_fake_quantize.py +++ b/tests/layer_tests/pytorch_tests/test_fake_quantize.py @@ -37,6 +37,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize( "scale, zero_point, quant_min, quant_max", [ @@ -61,6 +62,58 @@ def test_fake_quantize_per_tensor_affine( freeze_model=False ) +class TestFakeQuantizePerTensorAffineCacheMaskTensorQParams(PytorchLayerTest): + def _prepare_input(self): + return (np.random.randn(3, 2, 2).astype(np.float32),) + + def create_model(self, scale, zero_point, quant_min, quant_max): + class _fake_quantize_per_tensor_affine_cachemask_tensor_qparams(torch.nn.Module): + def __init__(self, scale, zero_point, quant_min, quant_max): + super(_fake_quantize_per_tensor_affine_cachemask_tensor_qparams, self).__init__() + self.scale = torch.tensor(scale) + self.zero_point = torch.tensor(zero_point) + self.fake_quant_enabled = torch.tensor(1) + self.quant_min = quant_min + self.quant_max = quant_max + + def forward(self, x): + return torch._fake_quantize_per_tensor_affine_cachemask_tensor_qparams( + x, self.scale, self.zero_point, self.fake_quant_enabled, self.quant_min, self.quant_max + ) + + ref_net = None + + return ( + _fake_quantize_per_tensor_affine_cachemask_tensor_qparams(scale, zero_point, quant_min, quant_max), + ref_net, + "aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams", + ) + + @pytest.mark.precommit_fx_backend + @pytest.mark.parametrize( + "scale, zero_point, quant_min, quant_max", + [ + (1.0, 1, 0, 255), + (0.01, 0, 0, 255), + (-0.01, 0, 0, 255), + (0.5, 0, -128, 127), + (0.5, -1, -128, 127), + (1.0, 0, 0, 127), + ], + ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') + def test__fake_quantize_per_tensor_affine_cachemask_tensor_qparams( + self, ie_device, precision, ir_version, scale, zero_point, quant_min, quant_max + ): + self._test( + *self.create_model(scale, zero_point, quant_min, quant_max), + ie_device, + precision, + ir_version, + freeze_model=False + ) + class TestFakeQuantizePerChannelAffine(PytorchLayerTest): def _prepare_input(self): @@ -91,6 +144,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize( "scale, zero_point, axis, quant_min, quant_max", [ diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index 4c54c14e06dd4a..ca949b14cb134a 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -84,6 +84,7 @@ def forward(self, x: float): @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export def test_full(self, shape, value, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, @@ -94,6 +95,7 @@ def test_full(self, shape, value, ie_device, precision, ir_version): @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) @pytest.mark.parametrize("with_names", [True, False]) @pytest.mark.nightly + @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export def test_full_dtype(self, shape, value, dtype, with_names, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, use_dtype=True, with_names=with_names), ie_device, precision, @@ -280,6 +282,7 @@ def forward(self, input_t: torch.Tensor, x: float): @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export def test_full_like(self, shape, value, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, @@ -349,6 +352,7 @@ def forward(self, input_tensor: torch.Tensor, x: float): @pytest.mark.parametrize("value,input_dtype", [(0, np.uint8), (1, np.int32), (-1, np.float32), (0.5, np.float64)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export def test_new_full(self, shape, value, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, @@ -480,6 +484,7 @@ def forward(self, x): @pytest.mark.parametrize("op_type", ["aten::zeros", "aten::ones", "aten::zeros_like", "aten::ones_like"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export def test_zeros_ones(self, op_type, shape, ie_device, precision, ir_version): self._test(*self.create_model(op_type), ie_device, precision, @@ -631,6 +636,7 @@ def forward(self, input_tensor: torch.Tensor): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) @@ -640,6 +646,7 @@ def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version): @pytest.mark.parametrize("dtype", ["bool", "uint8", "int8", "int32", "int64", "float32", "float64"]) @pytest.mark.nightly @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_new_ones_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_glu.py b/tests/layer_tests/pytorch_tests/test_glu.py index 3dbb1f423ee4ab..8a1fbb4e23a153 100644 --- a/tests/layer_tests/pytorch_tests/test_glu.py +++ b/tests/layer_tests/pytorch_tests/test_glu.py @@ -30,6 +30,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dim", [0, 1, 2, 3, -1, -2]) def test_glu(self, dim, ie_device, precision, ir_version): - self._test(*self.create_model(dim), ie_device, precision, ir_version) \ No newline at end of file + self._test(*self.create_model(dim), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_grid_sampler.py b/tests/layer_tests/pytorch_tests/test_grid_sampler.py index d81ba01aca3bca..5da728f9d564c0 100644 --- a/tests/layer_tests/pytorch_tests/test_grid_sampler.py +++ b/tests/layer_tests/pytorch_tests/test_grid_sampler.py @@ -37,6 +37,7 @@ def forward(self, input, grid): @pytest.mark.parametrize("align_corners", [True, False, None]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_grid_sampler(self, h_in, w_in, h_out, w_out, mode, padding_mode, align_corners, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_hardtanh.py b/tests/layer_tests/pytorch_tests/test_hardtanh.py new file mode 100644 index 00000000000000..d0c4c1aac1a38d --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_hardtanh.py @@ -0,0 +1,42 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import platform + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestHardtanh(PytorchLayerTest): + def _prepare_input(self, input_dtype="float32", input_shape=(1, 3, 10, 10)): + return (np.random.default_rng().uniform(-100.0, 100.0, input_shape).astype(input_dtype),) + + def create_model(self, min_val, max_val, inplace): + import torch + import torch.nn.functional as F + + class aten_hardtanh(torch.nn.Module): + def __init__(self, min_val, max_val, inplace): + super(aten_hardtanh, self).__init__() + self.min_val = min_val + self.max_val = max_val + self.inplace = inplace + + def forward(self, x): + return F.hardtanh(x, min_val=self.min_val, max_val=self.max_val, inplace=self.inplace) + + ref_net = None + + return aten_hardtanh(min_val, max_val, inplace), ref_net, "aten::hardtanh" + + @pytest.mark.parametrize(("min_val", "max_val"), [[-1.0,1.0], [0, 1.0], [-2.0, 2.0]]) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("input_dtype", ['float32', 'int32', 'int64', 'float64']) + @pytest.mark.parametrize("input_shape", [(1, 3, 10, 10), (100,), (24, 24)]) + @pytest.mark.precommit_fx_backend + def test_hardtanh(self, min_val, max_val, inplace, input_dtype, input_shape, ie_device, precision, ir_version): + self._test(*self.create_model(min_val, max_val, inplace), ie_device, precision, ir_version, + kwargs_to_prepare_input= {"input_dtype": input_dtype, "input_shape": input_shape}) diff --git a/tests/layer_tests/pytorch_tests/test_index_select.py b/tests/layer_tests/pytorch_tests/test_index_select.py index e74bc597661ebd..2cf29c9172b9c9 100644 --- a/tests/layer_tests/pytorch_tests/test_index_select.py +++ b/tests/layer_tests/pytorch_tests/test_index_select.py @@ -41,6 +41,7 @@ def forward_out(self, x, indices, out): @pytest.mark.parametrize("out", [False, True]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_index_select(self, dim, out, indices, ie_device, precision, ir_version): self._test(*self.create_model(dim, out), ie_device, precision, ir_version, kwargs_to_prepare_input={"index": indices, "out": out, "dim": dim}) diff --git a/tests/layer_tests/pytorch_tests/test_isfinite.py b/tests/layer_tests/pytorch_tests/test_isfinite.py new file mode 100644 index 00000000000000..d028b50c8bb2cf --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_isfinite.py @@ -0,0 +1,31 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +@pytest.mark.parametrize('input_tensor', (torch.tensor([1, float('inf'), 2, float('-inf'), float('nan')]))) +class TestIsFinite(PytorchLayerTest): + + def _prepare_input(self): + input_tensor = self.input_tensor + return (input_tensor,) + + def create_model(self): + class aten_isfinite(torch.nn.Module): + + def forward(self, input_tensor): + return torch.isfinite(input_tensor) + + ref_net = None + + return aten_isfinite(), ref_net, "aten::isfinite" + + @pytest.mark.precommit_fx_backend + def test_isfinite(self, ie_device, precision, ir_version, input_tensor): + self.input_tensor = input_tensor + self._test(*self.create_model(), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_isinf.py b/tests/layer_tests/pytorch_tests/test_isinf.py new file mode 100644 index 00000000000000..03e5dda64dd253 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_isinf.py @@ -0,0 +1,31 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +@pytest.mark.parametrize('input_tensor', (torch.tensor([1, float('inf'), 2, float('-inf'), float('nan')]))) +class TestIsInf(PytorchLayerTest): + + def _prepare_input(self): + input_tensor = self.input_tensor + return (input_tensor,) + + def create_model(self): + class aten_isinf(torch.nn.Module): + + def forward(self, input_tensor): + return torch.isinf(input_tensor) + + ref_net = None + + return aten_isinf(), ref_net, "aten::isinf" + + @pytest.mark.precommit_fx_backend + def test_isinf(self, ie_device, precision, ir_version, input_tensor): + self.input_tensor = input_tensor + self._test(*self.create_model(), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_isnan.py b/tests/layer_tests/pytorch_tests/test_isnan.py new file mode 100644 index 00000000000000..463d59392f09b1 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_isnan.py @@ -0,0 +1,31 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +@pytest.mark.parametrize('input_tensor', (torch.tensor([1, float('nan'), 2]))) +class TestIsNan(PytorchLayerTest): + + def _prepare_input(self): + input_tensor = self.input_tensor + return (input_tensor,) + + def create_model(self): + class aten_isnan(torch.nn.Module): + + def forward(self, input_tensor): + return torch.isnan(input_tensor) + + ref_net = None + + return aten_isnan(), ref_net, "aten::isnan" + + @pytest.mark.precommit_fx_backend + def test_isnan(self, ie_device, precision, ir_version, input_tensor): + self.input_tensor = input_tensor + self._test(*self.create_model(), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_leaky_relu.py b/tests/layer_tests/pytorch_tests/test_leaky_relu.py index 002c76e5814001..f00e6d241220ab 100644 --- a/tests/layer_tests/pytorch_tests/test_leaky_relu.py +++ b/tests/layer_tests/pytorch_tests/test_leaky_relu.py @@ -32,5 +32,6 @@ def forward(self, x): @pytest.mark.parametrize("inplace", [skip_if_export(True), False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_leaky_relu(self, alpha, inplace, ie_device, precision, ir_version): self._test(*self.create_model(alpha, inplace), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_logical_ops.py b/tests/layer_tests/pytorch_tests/test_logical_ops.py index 210fd1a4bdb690..842d895542afb9 100644 --- a/tests/layer_tests/pytorch_tests/test_logical_ops.py +++ b/tests/layer_tests/pytorch_tests/test_logical_ops.py @@ -53,6 +53,7 @@ def forward_not_out(self, tensor_a, out): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("op_type", ["and", "or", "not", "xor"]) @pytest.mark.parametrize("first_dtype", ["bool", "int32", 'int8', 'float32']) @pytest.mark.parametrize("second_dtype", ["bool", "int32", 'int8', 'float32']) @@ -61,4 +62,4 @@ def test_logical(self, op_type, out, first_dtype, second_dtype, ie_device, preci self._test(*self.create_model(op_type, out), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out, "unary": op_type == "not", - "first_dtype": first_dtype, "second_dtype": second_dtype}) \ No newline at end of file + "first_dtype": first_dtype, "second_dtype": second_dtype}) diff --git a/tests/layer_tests/pytorch_tests/test_masked_fill.py b/tests/layer_tests/pytorch_tests/test_masked_fill.py index 8fc59c9149c04e..4959411c26a04b 100644 --- a/tests/layer_tests/pytorch_tests/test_masked_fill.py +++ b/tests/layer_tests/pytorch_tests/test_masked_fill.py @@ -54,6 +54,7 @@ def forward(self, x, mask): @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_masked_fill(self, value, mask_fill, mask_dtype, input_dtype, inplace, ie_device, precision, ir_version): self._test(*self.create_model(value, inplace), ie_device, precision, ir_version, diff --git a/tests/layer_tests/pytorch_tests/test_mean.py b/tests/layer_tests/pytorch_tests/test_mean.py index 1ba45e1a3b3791..46ce6d33918baa 100644 --- a/tests/layer_tests/pytorch_tests/test_mean.py +++ b/tests/layer_tests/pytorch_tests/test_mean.py @@ -80,6 +80,7 @@ def forward_out(self, x, out): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_sum(self, axes, keep_dim, dtype, out, ie_device, precision, ir_version): if PytorchLayerTest.use_torch_export() and out: pytest.skip(reason="export fails for out") diff --git a/tests/layer_tests/pytorch_tests/test_min_max.py b/tests/layer_tests/pytorch_tests/test_min_max.py index beba07b1d02540..d857c222158ac5 100644 --- a/tests/layer_tests/pytorch_tests/test_min_max.py +++ b/tests/layer_tests/pytorch_tests/test_min_max.py @@ -76,6 +76,7 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_reduce_min_max(self, axes, keep_dims, op_type, ie_device, precision, ir_version): self._test(*self.create_model(op_type, axes, keep_dims, single_input=True), ie_device, precision, ir_version) @@ -86,6 +87,7 @@ def test_reduce_min_max(self, axes, keep_dims, op_type, ie_device, precision, ir @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_min_max(self, op_type, first_input_dtype, second_input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(op_type, None, None, single_input=False, dtypes=(first_input_dtype, second_input_dtype)), ie_device, precision, ir_version, kwargs_to_prepare_input= @@ -266,6 +268,7 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_minimum_maximum( self, op_type, first_input_dtype, second_input_dtype, ie_device, precision, ir_version ): @@ -342,4 +345,4 @@ def test_amin_amax(self, op_type, input_dtype, axis, keep_dims, out, ie_device, self._test(*self.create_model(op_type, axis, keep_dims, out), ie_device, precision, ir_version, kwargs_to_prepare_input= {"input_dtype": input_dtype, "out": out, "axes": axis, "keep_dims": keep_dims} - ) \ No newline at end of file + ) diff --git a/tests/layer_tests/pytorch_tests/test_pooling.py b/tests/layer_tests/pytorch_tests/test_pooling.py index f8c190917c2c92..24fc01fdcffaed 100644 --- a/tests/layer_tests/pytorch_tests/test_pooling.py +++ b/tests/layer_tests/pytorch_tests/test_pooling.py @@ -157,6 +157,7 @@ def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): @@ -169,6 +170,7 @@ def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): @@ -232,6 +234,7 @@ def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precis @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_max_pool2d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): @@ -248,6 +251,7 @@ def test_max_pool2d_indices(self, params, ceil_mode, dilation, ie_device, preci @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_max_pool3d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_pow.py b/tests/layer_tests/pytorch_tests/test_pow.py index fb59a8ab4e1cc5..1321f4d6dd79aa 100644 --- a/tests/layer_tests/pytorch_tests/test_pow.py +++ b/tests/layer_tests/pytorch_tests/test_pow.py @@ -47,6 +47,7 @@ def forward_inplace(self, input_data, exponent): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_pow(self, inplace, ie_device, precision, ir_version, test_input): if inplace and PytorchLayerTest.use_torch_export(): pytest.skip(reason="export fails for inplace") @@ -109,6 +110,7 @@ def forward3(self, lhs, rhs): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_pow_mixed_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_repeat.py b/tests/layer_tests/pytorch_tests/test_repeat.py index beab3ab8b10341..9ac59e2f02e5e3 100644 --- a/tests/layer_tests/pytorch_tests/test_repeat.py +++ b/tests/layer_tests/pytorch_tests/test_repeat.py @@ -30,6 +30,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_repeat(self, repeats, ie_device, precision, ir_version): self._test(*self.create_model(repeats), ie_device, precision, ir_version) @@ -56,6 +57,7 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_repeat(self, repeats, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, kwargs_to_prepare_input={"repeats_shape": repeats}) @@ -79,5 +81,6 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_repeat_t5(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_roi_align.py b/tests/layer_tests/pytorch_tests/test_roi_align.py index 0079d9c0ca77e6..896bd1a13c0966 100644 --- a/tests/layer_tests/pytorch_tests/test_roi_align.py +++ b/tests/layer_tests/pytorch_tests/test_roi_align.py @@ -52,6 +52,7 @@ def forward(self, input_tensor, rois): @pytest.mark.parametrize('aligned', (True, False)) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_roi_align(self, ie_device, precision, ir_version, input_shape, boxes, output_size, spatial_scale, sampling_ratio, aligned): self.input_tensor = np.random.randn(*input_shape).astype(np.float32) diff --git a/tests/layer_tests/pytorch_tests/test_roll.py b/tests/layer_tests/pytorch_tests/test_roll.py index 5a5f63c772d5c5..eabae207e2b3d6 100644 --- a/tests/layer_tests/pytorch_tests/test_roll.py +++ b/tests/layer_tests/pytorch_tests/test_roll.py @@ -18,12 +18,12 @@ class aten_roll(torch.nn.Module): def __init__(self, shifts, dim=None): super(aten_roll, self).__init__() self.dim = dim - self.shits = shifts + self.shifts = shifts def forward(self, x): if self.dim is not None: - return torch.roll(x, self.shits, self.dim) - return torch.roll(x, self.shits) + return torch.roll(x, self.shifts, self.dim) + return torch.roll(x, self.shifts) ref_net = None @@ -38,5 +38,6 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_roll(self, shifts, dim, ie_device, precision, ir_version): self._test(*self.create_model(shifts, dim), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_rsub.py b/tests/layer_tests/pytorch_tests/test_rsub.py index 70380a9e5f807d..0d918120f29c6a 100644 --- a/tests/layer_tests/pytorch_tests/test_rsub.py +++ b/tests/layer_tests/pytorch_tests/test_rsub.py @@ -104,9 +104,10 @@ def forward2(self, lhs, rhs:int): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_rsub_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type): self.lhs_type = lhs_type self.lhs_shape = lhs_shape self.rhs_type = rhs_type self._test(*self.create_model(lhs_type, rhs_type), - ie_device, precision, ir_version) \ No newline at end of file + ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_scatter.py b/tests/layer_tests/pytorch_tests/test_scatter.py index 6f8e0cdd1623d7..34c4ad84adf142 100644 --- a/tests/layer_tests/pytorch_tests/test_scatter.py +++ b/tests/layer_tests/pytorch_tests/test_scatter.py @@ -91,6 +91,7 @@ def _forward_inplace_reduce(self, x: torch.Tensor): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dim", [1, -1, 0]) @pytest.mark.parametrize( "index", diff --git a/tests/layer_tests/pytorch_tests/test_select_scatter.py b/tests/layer_tests/pytorch_tests/test_select_scatter.py new file mode 100644 index 00000000000000..112675264c74a5 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_select_scatter.py @@ -0,0 +1,36 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +from pytorch_layer_test_class import PytorchLayerTest +import torch + + +class TestSelectScatter(PytorchLayerTest): + def _prepare_input(self): + import numpy as np + return (np.random.randn(2, 5, 3, 4).astype(np.float32),) + + def create_model(self, src, dim, index): + + class aten_select_scatter(torch.nn.Module): + def __init__(self, src=None, dim=None, index=None): + super(aten_select_scatter, self).__init__() + self.src = src + self.dim = dim + self.index = index + + def forward(self, x): + return torch.select_scatter(x, self.src, self.dim, self.index); + + + ref_net = None + + return aten_select_scatter(src, dim, index), ref_net, "aten::select_scatter" + + @pytest.mark.precommit_fx_backend + @pytest.mark.parametrize(("src", "dim", "index"), + [(torch.ones(2), 0, 0),]) + def aten_select_scatter(self, src, dim, index, ie_device, precision, ir_version): + self._test(*self.create_model(src, dim, index), + ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_sign.py b/tests/layer_tests/pytorch_tests/test_sign.py index dac0b32f70d05d..9cad2fbd6ea745 100644 --- a/tests/layer_tests/pytorch_tests/test_sign.py +++ b/tests/layer_tests/pytorch_tests/test_sign.py @@ -45,6 +45,7 @@ def forward_out(self, x, out): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("input_type", ["zeros", "positive", "negative", "mixed"]) @pytest.mark.parametrize("out", [True, False]) def test_sign(self, input_type, out, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_slice_scatter.py b/tests/layer_tests/pytorch_tests/test_slice_scatter.py new file mode 100644 index 00000000000000..0d291f6bb4d3aa --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_slice_scatter.py @@ -0,0 +1,41 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from pytorch_layer_test_class import PytorchLayerTest + + + +class TestSliceScatter(PytorchLayerTest): + def _prepare_input(self): + import numpy as np + return (np.random.randn(2, 5, 3, 4).astype(np.float32),) + + def create_model(self, src, dim, start, end, step): + + import torch + class aten_slice_scatter(torch.nn.Module): + def __init__(self, src=None, dim=None, start=None, end=None, step=None): + super(aten_slice_scatter, self).__init__() + self.src = src + self.dim = dim + self.start = start + self.end = end + self.step = step + + def forward(self, x): + return torch.slice_scatter(x, src=self.src, dim=self.dim, start=self.start, end=self.end, step=self.step); + + + ref_net = None + + return aten_slice_scatter(src, dim, start, end, step), ref_net, "aten::slice_scatter" + + import torch + @pytest.mark.precommit_fx_backend + @pytest.mark.parametrize(("src", "dim", "start", "end", "step"), + [(torch.ones(2), 1, 1, 2, 1),]) + def aten_slice_scatter(self, src, dim, start, end, step, ie_device, precision, ir_version): + self._test(*self.create_model(src, dim, start, end, step), + ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_sort.py b/tests/layer_tests/pytorch_tests/test_sort.py index c92508b9e47d16..53f21833a21c50 100644 --- a/tests/layer_tests/pytorch_tests/test_sort.py +++ b/tests/layer_tests/pytorch_tests/test_sort.py @@ -78,6 +78,7 @@ def forward(self, input_tensor): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_sort(self, input_shape, descending, stable, ie_device, precision, ir_version): self.input_tensor = [] if type(input_shape) is list: diff --git a/tests/layer_tests/pytorch_tests/test_topk.py b/tests/layer_tests/pytorch_tests/test_topk.py index 512d9ed41f606e..cee8c103ab791d 100644 --- a/tests/layer_tests/pytorch_tests/test_topk.py +++ b/tests/layer_tests/pytorch_tests/test_topk.py @@ -61,6 +61,7 @@ def forward(self, input_tensor): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 115085") def test_topK(self, input_shape, k, dim, largest, sort, ie_device, precision, ir_version): self.input_tensor = np.random.randn(*input_shape).astype(np.float32) diff --git a/tests/layer_tests/pytorch_tests/test_trilu.py b/tests/layer_tests/pytorch_tests/test_trilu.py index 87afd796da5268..0bdb1dfd983778 100644 --- a/tests/layer_tests/pytorch_tests/test_trilu.py +++ b/tests/layer_tests/pytorch_tests/test_trilu.py @@ -41,6 +41,7 @@ def forward(self, x): @pytest.mark.parametrize("op", ["triu", "tril"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_trilu(self, input_shape, dtype, diagonal, op, ie_device, precision, ir_version): self._test(*self.create_model(op, diagonal), ie_device, precision, ir_version, kwargs_to_prepare_input={"shape": input_shape, "dtype": dtype}) @@ -89,6 +90,7 @@ def triu_(self, x): @pytest.mark.parametrize("op", ["triu", "tril", "triu_", "tril_"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend def test_trilu(self, input_shape, dtype, diagonal, op, ie_device, precision, ir_version): self._test(*self.create_model(op, diagonal), ie_device, precision, ir_version, - kwargs_to_prepare_input={"shape": input_shape, "dtype": dtype}) \ No newline at end of file + kwargs_to_prepare_input={"shape": input_shape, "dtype": dtype}) diff --git a/tests/layer_tests/pytorch_tests/test_unary_ops.py b/tests/layer_tests/pytorch_tests/test_unary_ops.py index 25a6aeccf93d99..d54d1102737134 100644 --- a/tests/layer_tests/pytorch_tests/test_unary_ops.py +++ b/tests/layer_tests/pytorch_tests/test_unary_ops.py @@ -66,7 +66,8 @@ "aten::asinh": torch.asinh, "aten::asinh_": torch.asinh_, "aten::atanh": torch.atanh, - "aten::atanh_": torch.atanh_ + "aten::atanh_": torch.atanh_, + "aten::hardswish": F.hardswish } @@ -117,6 +118,7 @@ def _prepare_input(self): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", [torch.float32, torch.float64, torch.int8, torch.uint8, torch.int32, torch.int64]) @pytest.mark.parametrize("op_type", [ @@ -160,6 +162,7 @@ def test_unary_op(self, op_type, dtype, ie_device, precision, ir_version): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", [torch.float32, torch.float64]) @pytest.mark.parametrize("op_type", [ @@ -192,7 +195,8 @@ def test_unary_op(self, op_type, dtype, ie_device, precision, ir_version): "aten::atan_", "aten::acosh_", "aten::asinh_", - "aten::atanh_" + "aten::atanh_", + "aten::hardswish" ]) def test_unary_op_float(self, op_type, dtype, ie_device, precision, ir_version): self.dtype = dtype @@ -241,12 +245,14 @@ def test_unary_op_out(self, op_type, dtype, ie_device, precision, ir_version): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("dtype", [torch.float32, torch.float64]) @pytest.mark.parametrize("op_type", [ "aten::relu6", "aten::selu", "aten::silu", + "aten::hardswish", "aten::mish", ]) def test_unary_func_op_inplace(self, op_type, dtype, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_unfold.py b/tests/layer_tests/pytorch_tests/test_unfold.py index 4d5b9ee57ffe21..671af872d96973 100644 --- a/tests/layer_tests/pytorch_tests/test_unfold.py +++ b/tests/layer_tests/pytorch_tests/test_unfold.py @@ -39,6 +39,7 @@ def forward(self, input_tensor): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export + @pytest.mark.precommit_fx_backend def test_unfold(self, ie_device, precision, ir_version, dimension, size, step, input_shape): self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(dimension, size, step), diff --git a/tests/layer_tests/pytorch_tests/test_var_mean.py b/tests/layer_tests/pytorch_tests/test_var_mean.py index 4863d4d29677b7..8318b6330e0bc0 100644 --- a/tests/layer_tests/pytorch_tests/test_var_mean.py +++ b/tests/layer_tests/pytorch_tests/test_var_mean.py @@ -52,6 +52,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("unbiased", [True, False]) @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"]) @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', @@ -61,6 +62,7 @@ def test_op2args(self, unbiased, op_type, ie_device, precision, ir_version): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("unbiased", [False, True]) @pytest.mark.parametrize("dim", [None, 0, 1, 2, 3, -1, -2, (0, 1), (-1, -2), (0, 1, -1), (0, 1, 2, 3)]) @pytest.mark.parametrize("keepdim", [True, False]) @@ -68,4 +70,4 @@ def test_op2args(self, unbiased, op_type, ie_device, precision, ir_version): @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_op(self, unbiased, dim, keepdim, op_type, ie_device, precision, ir_version): - self._test(*self.create_model(unbiased, dim, keepdim, two_args_case=False, op_type=op_type), ie_device, precision, ir_version) \ No newline at end of file + self._test(*self.create_model(unbiased, dim, keepdim, two_args_case=False, op_type=op_type), ie_device, precision, ir_version) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Equal.py b/tests/layer_tests/tensorflow_tests/test_tf_Equal.py index b785e4ce440d95..502d0cfc33ba06 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Equal.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Equal.py @@ -241,8 +241,8 @@ def create_equal_net(self, x_shape, y_shape): return tf_net, ref_net - @pytest.mark.parametrize('x_shape', [[1], [5]]) - @pytest.mark.parametrize('y_shape', [[1], [5]]) + @pytest.mark.parametrize('x_shape', [[], [1], [5]]) + @pytest.mark.parametrize('y_shape', [[], [1], [5]]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', diff --git a/tests/layer_tests/tensorflow_tests/test_tf_OnesLike.py b/tests/layer_tests/tensorflow_tests/test_tf_OnesLike.py index 1a5cb7110e8288..0da2822155c8d9 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_OnesLike.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_OnesLike.py @@ -43,3 +43,48 @@ def test_ones_like(self, params, ie_device, precision, ir_version, temp_dir, self._test(*self.create_ones_like_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) + + +class TestComplexOnesLike(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + rng = np.random.default_rng() + assert 'x_real:0' in inputs_info + assert 'x_imag:0' in inputs_info + x_real_shape = inputs_info['x_real:0'] + x_imag_shape = inputs_info['x_imag:0'] + inputs_data = {} + inputs_data['x_real:0'] = 4 * rng.random(x_real_shape).astype(self.x_type) - 2 + inputs_data['x_imag:0'] = 4 * rng.random(x_imag_shape).astype(self.x_type) - 2 + return inputs_data + + def create_complex_ones_like_net(self, x_shape, x_type): + self.x_type = x_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x_real = tf.compat.v1.placeholder(tf.dtypes.as_dtype(x_type), x_shape, 'x_real') + x_imag = tf.compat.v1.placeholder(tf.dtypes.as_dtype(x_type), x_shape, 'x_imag') + x_complex = tf.raw_ops.Complex(real=x_real, imag=x_imag) + ones_like = tf.raw_ops.OnesLike(x=x_complex) + real = tf.raw_ops.Real(input=ones_like) + img = tf.raw_ops.Imag(input=ones_like) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(x_shape=[], x_type=np.float32), + dict(x_shape=[2], x_type=np.float32), + dict(x_shape=[2, 3, 4], x_type=np.float32), + dict(x_shape=[1, 4, 3, 1], x_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_complex_ones_like(self, params, ie_device, precision, ir_version, temp_dir, + use_legacy_frontend): + self._test(*self.create_complex_ones_like_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) diff --git a/tests/model_hub_tests/tensorflow/test_tf_hub_api_notebooks.py b/tests/model_hub_tests/tensorflow/test_tf_hub_api_notebooks.py index b04f6c15d5c5d4..5c34d8125b9c6d 100644 --- a/tests/model_hub_tests/tensorflow/test_tf_hub_api_notebooks.py +++ b/tests/model_hub_tests/tensorflow/test_tf_hub_api_notebooks.py @@ -5,6 +5,7 @@ import tensorflow as tf import tensorflow_hub as hub from models_hub_common.test_convert_model import TestConvertModel + from utils import get_input_info diff --git a/tools/constraints.txt b/tools/constraints.txt index e91d858e4eabcc..bba8b804da0f2b 100644 --- a/tools/constraints.txt +++ b/tools/constraints.txt @@ -4,6 +4,7 @@ # tensorflow, numpy mxnet~=1.2.0; sys_platform == 'win32' mxnet>=1.7.0.post2,<=1.9.1; sys_platform != 'win32' +mpmath<1.4; extra == 'pytorch' onnx>=1.8.1,<=1.15.0 networkx<=3.1.0 pytest>=5.0,<7.3 diff --git a/tools/mo/requirements_tf.txt b/tools/mo/requirements_tf.txt index e9ff04cc7e6772..e32f7e076bc1d1 100644 --- a/tools/mo/requirements_tf.txt +++ b/tools/mo/requirements_tf.txt @@ -1,5 +1,5 @@ -c ../constraints.txt -tensorflow>=1.15.5,<2.16.0 +tensorflow>=1.15.5,<2.17.0 numpy>=1.16.6,<1.26 networkx defusedxml diff --git a/tools/mo/requirements_tf2.txt b/tools/mo/requirements_tf2.txt index d5825581a5549c..3aa9a1f82e510b 100644 --- a/tools/mo/requirements_tf2.txt +++ b/tools/mo/requirements_tf2.txt @@ -1,5 +1,5 @@ -c ../constraints.txt -tensorflow>=2.5,<2.16.0 +tensorflow>=2.5,<2.17.0 numpy>=1.16.6,<1.26 networkx defusedxml diff --git a/tools/ovc/openvino/tools/ovc/convert_impl.py b/tools/ovc/openvino/tools/ovc/convert_impl.py index 648e72ffe5a548..748cbe2b0453d9 100644 --- a/tools/ovc/openvino/tools/ovc/convert_impl.py +++ b/tools/ovc/openvino/tools/ovc/convert_impl.py @@ -38,6 +38,7 @@ # pylint: disable=no-name-in-module,import-error from openvino.frontend import FrontEndManager, OpConversionFailure, TelemetryExtension +from openvino.frontend.pytorch.module_extension import ModuleExtension from openvino.runtime import get_version as get_rt_version from openvino.runtime import Type, PartialShape @@ -173,7 +174,8 @@ def prepare_ir(argv: argparse.Namespace): moc_front_end.add_extension(TelemetryExtension("ovc", t.send_event, t.send_error, t.send_stack_trace)) if any_extensions_used(argv): for extension in argv.extension: - moc_front_end.add_extension(extension) + if not isinstance(extension, ModuleExtension): + moc_front_end.add_extension(extension) ov_model = moc_pipeline(argv, moc_front_end) return ov_model diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py index 0e057c50bec95d..8e562cf21b8606 100644 --- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py @@ -8,6 +8,14 @@ # pylint: disable=no-name-in-module,import-error from openvino.runtime import Tensor, PartialShape from openvino.tools.ovc.error import Error +from openvino.frontend.pytorch.module_extension import ModuleExtension + + +def extract_module_extensions(args): + extensions = args.get('extension', []) + if not isinstance(extensions, (list, tuple)): + extensions = [extensions] + return {extension.module: extension for extension in extensions if isinstance(extension, ModuleExtension)} def get_pytorch_decoder(model, example_inputs, args): @@ -37,7 +45,11 @@ def get_pytorch_decoder(model, example_inputs, args): if hasattr(torch, "export") and isinstance(model, (torch.export.ExportedProgram)): raise RuntimeError("Models received from torch.export are not yet supported by convert_model.") else: - decoder = TorchScriptPythonDecoder(model, example_input=inputs, shared_memory=args.get("share_weights", True)) + decoder = TorchScriptPythonDecoder( + model, + example_input=inputs, + shared_memory=args.get("share_weights", True), + module_extensions=extract_module_extensions(args)) else: decoder = model args['input_model'] = decoder