diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 352049ffbd9211..b1882c395c2873 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -97,7 +97,7 @@ # QA Tests: /tests/ @openvinotoolkit/openvino-tests-maintainers -/tests/layer_tests/ @openvinotoolkit/openvino-tests-maintainers @openvinotoolkit/openvino-mo-maintainers +/tests/layer_tests/ @openvinotoolkit/openvino-tests-maintainers @openvinotoolkit/openvino-ovc-maintainers /tests/layer_tests/pytorch_tests/ @openvinotoolkit/openvino-pytorch-frontend-maintainers /tests/layer_tests/tensorflow_tests @openvinotoolkit/openvino-tf-frontend-maintainers /tests/layer_tests/jax_tests @openvinotoolkit/openvino-tf-frontend-maintainers @openvinotoolkit/openvino-jax-frontend-maintainers @@ -110,7 +110,7 @@ /tools/legacy/ @openvinotoolkit/openvino-samples-maintainers /tools/openvino_dev/ @openvinotoolkit/openvino-tools-maintainers @openvinotoolkit/openvino-ie-python-api-maintainers /tools/mo/ @openvinotoolkit/openvino-mo-maintainers -/tools/ovc/ @openvinotoolkit/openvino-mo-maintainers +/tools/ovc/ @openvinotoolkit/openvino-ovc-maintainers /thirdparty/open_model_zoo/ @openvinotoolkit/omz-maintainers # Documentation @@ -118,7 +118,7 @@ /docs/CMakeLists.txt @openvinotoolkit/openvino-ie-maintainers /**/*.md @openvinotoolkit/openvino-docs-maintainers /**/*.svg @openvinotoolkit/openvino-docs-maintainers -/docs/MO_DG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-mo-maintainers +/docs/MO_DG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ovc-maintainers /docs/OV_Runtime_UG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ie-maintainers /docs/IE_PLUGIN_DG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ie-maintainers /docs/Extensibility_UG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ie-maintainers diff --git a/.github/actions/create_manifest/action.yml b/.github/actions/create_manifest/action.yml new file mode 100644 index 00000000000000..66d59930e93712 --- /dev/null +++ b/.github/actions/create_manifest/action.yml @@ -0,0 +1,44 @@ +name: 'Create manifest' +description: 'Creates manifest containing versions of the product and the corresponding repositories' +inputs: + repos: + description: "Multi-line list of repositories to include to manifest" + required: true + product_type: + description: "Unique string to reflect product configuration" + required: true + save_to: + description: "Path to save manifest to" + required: true + action_path: + description: "Action path, if not set - taken from github context" + required: false + target_arch: + description: "Target architecture" + required: true + build_type: + description: "Build type: release | debug | release_with_debug" + required: true + + +runs: + using: "composite" + steps: + - name: Install Python dependencies + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + pip install -r ${{ env.ACTION_PATH }}/requirements.txt + env: + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} + + - name: 'Create manifest' + id: create_manifest + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + python ${{ env.ACTION_PATH }}/create_manifest.py + --target_arch "${{ inputs.target_arch }}" --build_type "${{ inputs.build_type }}" + --save_to "${{ inputs.save_to }}" --product_type "${{ inputs.product_type }}" -r "${{ inputs.repos }}" + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} diff --git a/.github/actions/create_manifest/create_manifest.py b/.github/actions/create_manifest/create_manifest.py new file mode 100644 index 00000000000000..1fb3a4712807e6 --- /dev/null +++ b/.github/actions/create_manifest/create_manifest.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import argparse +import logging +import os +from datetime import timezone +from pathlib import Path +import re +import git + +from manifest_manager import Manifest, Repository, Component + + +def parse_args(): + parser = argparse.ArgumentParser(description='Creates manifest with product and repositories version') + parser.add_argument('-e', '--event_name', help='Name of GitHub event', required=False) + parser.add_argument('-r', '--repos', type=str, help='Paths to repositories to lon in manifest', + required=True) + parser.add_argument('--product_type', help='Unique string to reflect product configuration', required=True) + parser.add_argument('--target_arch', help='Target architecture', required=True) + parser.add_argument('--build_type', help='Build type: release | debug | release_with_debug', required=True) + parser.add_argument('--save_to', help='Path to save manifest to', required=True) + args = parser.parse_args() + return args + + +def init_logger(): + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-15s %(levelname)-8s %(message)s', + datefmt='%m-%d-%Y %H:%M:%S') + + +def set_github_output(name: str, value: str, github_output_var_name: str = 'GITHUB_OUTPUT'): + """Sets output variable for a GitHub Action""" + logger = logging.getLogger(__name__) + # In an environment variable "GITHUB_OUTPUT" GHA stores path to a file to write outputs to + with open(os.environ.get(github_output_var_name), 'a+') as file: + logger.info(f"Add {name}={value} to {github_output_var_name}") + print(f'{name}={value}', file=file) + + +def get_repo_data(repo_dir: str | Path) -> dict: + repo = git.Repo(str(repo_dir)) + repo_url = next(repo.remote().urls) + repo_name_match = re.search(r'github\.com/[^/]+/([^/]+)', repo_url) + repo_name = repo_name_match.group(1) if repo_name_match else None + + trigger_repo_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}" + is_trigger_repo = repo_url == trigger_repo_url + + branch = os.getenv('GITHUB_REF') if is_trigger_repo else repo.references[0].name + target_branch = os.getenv('GITHUB_BASE_REF') if is_trigger_repo else None + revision = os.getenv('PR_HEAD_SHA') or os.getenv('GITHUB_SHA') if is_trigger_repo else repo.head.commit.hexsha + target_revision = os.getenv('BASE_SHA') if is_trigger_repo else None + # Commit time of a merge commit (in case of PR merged to target) + # TODO: Save commit time of a head commit in PR as well? + commit_time = repo.head.commit.committed_datetime.astimezone(timezone.utc) + merge_target = branch.endswith('/merge') + return { + 'name': repo_name, + 'url': repo_url, + 'branch': branch.replace('refs/heads/', ''), # To align with internal manifest + 'target_branch': target_branch, + 'revision': revision, + 'target_revision': target_revision, + 'commit_time': commit_time, + 'merge_target': merge_target, + 'trigger': is_trigger_repo, + } + + +def parse_ov_version(header_file: str | Path) -> str: + header_code = Path(header_file).read_text() + major, minor, patch = (re.search(rf"#define OPENVINO_VERSION_{name} (\d+)", header_code).group(1) + for name in ["MAJOR", "MINOR", "PATCH"]) + return f"{major}.{minor}.{patch}" + + +def generate_manifest(repos: list, product_type: str, event_type: str, build_type: str, target_arch: str) -> Manifest: + manifest = Manifest() + component_name = 'dldt' # historical, keep for internal compatibility + repositories = [] + ov_version = None + trigger_repo = None + + for repo_dir in repos: + repo = Repository(**get_repo_data(repo_dir)) + repositories.append(repo) + if repo.name == 'openvino': + version_file = Path(repo_dir) / 'src' / 'core' / 'include' / 'openvino' / 'core' / 'version.hpp' + ov_version = parse_ov_version(version_file) + if repo.trigger: + trigger_repo = repo + + custom_branch_name = f'-{trigger_repo.branch}' if trigger_repo.branch != 'master' else '' + run_number_postfix = f'-{os.environ.get("GITHUB_RUN_NUMBER")}' if os.environ.get("GITHUB_RUN_NUMBER") else '' + product_version = f"{ov_version}{run_number_postfix}-{trigger_repo.revision[:11]}{custom_branch_name}" + ci_build_dev_tag = f'dev{trigger_repo.commit_time.strftime("%Y%m%d")}' + wheel_product_version = f'{ov_version}.{ci_build_dev_tag}' + + set_github_output('CI_BUILD_NUMBER', product_version, 'GITHUB_ENV') + set_github_output('CI_BUILD_DEV_TAG', ci_build_dev_tag, 'GITHUB_ENV') + + component = Component(name=component_name, version=product_version, product_type=product_type, + target_arch=target_arch, build_type=build_type, build_event=event_type, + repositories=repositories, custom_params={'wheel_product_version': wheel_product_version}) + + manifest.add_component(component) + return manifest + + +def main(): + init_logger() + logger = logging.getLogger(__name__) + args = parse_args() + + event_name = args.event_name or os.getenv('GITHUB_EVENT_NAME') + event_type = 'pre_commit' if event_name == 'pull_request' else 'commit' + + repos = args.repos.split() + manifest = generate_manifest(repos, args.product_type, event_type, args.build_type, args.target_arch) + + logger.info(f"Saving manifest to {args.save_to}") + manifest.save_manifest(args.save_to) + + +if __name__ == '__main__': + main() diff --git a/.github/actions/create_manifest/manifest_manager.py b/.github/actions/create_manifest/manifest_manager.py new file mode 100644 index 00000000000000..9978f65c0d560e --- /dev/null +++ b/.github/actions/create_manifest/manifest_manager.py @@ -0,0 +1,336 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import yaml +from pathlib import Path +from copy import deepcopy +from typing import Optional, Dict, List, Union, Iterator, Any + + +class ManifestException(Exception): + """Base Manifest file manager exception""" + + +class ManifestDoesNotExist(ManifestException): + """ManifestDoesNotExist Manifest file manager exception""" + + +class ManifestSavingError(ManifestException): + """ManifestSavingError Manifest file manager exception""" + + +class WrongComponentFormatError(ManifestException): + """WrongComponentFormatError Manifest file manager exception""" + + +class WrongRepositoryFormatError(ManifestException): + """WrongRepositoryFormatError Manifest file manager exception""" + + +class Manifest: + """Manifest wrapper""" + + default_manifest_name = "manifest.yml" + + def __init__(self, manifest_path: Optional[str] = None): + """ + :param manifest_path: Path to a manifest file + """ + self._manifest_file = Path(manifest_path or self.default_manifest_name) + if self._manifest_file.is_dir(): + self._manifest_file = self._manifest_file / self.default_manifest_name + + self._manifest_version = "1.0" + self._components: Dict[str, Component] = {} + + if manifest_path is not None: + self._prepare_manifest() + + def __repr__(self) -> str: + return str(self._manifest_file) + + def _prepare_manifest(self) -> None: + """Read manifest file and convert its data to objects""" + if not self._manifest_file.is_file(): + raise ManifestDoesNotExist(f'Cannot find manifest "{self._manifest_file}"') + + with self._manifest_file.open("r") as manifest: + manifest_info = yaml.safe_load(manifest) + + if not isinstance(manifest_info, dict): + raise ManifestDoesNotExist(f'Incorrect manifest "{self._manifest_file}"') + + self._manifest_version = manifest_info.get("manifest_version", self._manifest_version) + + for name, info in manifest_info["components"].items(): + self._components[name] = Component.from_dict({ + "name": name, + "version": info["version"], + "repository": info["repository"], + "product_type": info["product_type"], + "target_arch": info["target_arch"], + "build_type": info["build_type"], + "build_event": info["build_event"], + "custom_params": info.get("custom_params") + }) + + @property + def version(self) -> str: + return self._manifest_version + + @property + def components(self) -> List[Component]: + return list(self._components.values()) + + def get_component(self, component_name: str) -> Optional[Component]: + return self._components.get(component_name) + + def add_component(self, component: Component, replace: bool = False) -> bool: + if not replace and component.name in self._components: + return False + self._components[component.name] = component + return True + + def delete_component(self, component_name: str) -> bool: + return self._components.pop(component_name, None) is not None + + def save_manifest(self, save_to: Union[str, Path]) -> None: + class YamlDumper(yaml.SafeDumper): + """Formatting PyYAML dump() output""" + + def write_line_break(self, data=None): + super().write_line_break(data) + if len(self.indents) in {1, 2, 4}: + super().write_line_break() + + path_to_save = Path(save_to) + if path_to_save.is_dir(): + path_to_save = path_to_save / self.default_manifest_name + else: + path_to_save.parent.mkdir(parents=True, exist_ok=True) + + manifest_data = {"components": {}, "manifest_version": self._manifest_version} + for comp_name, comp_data in self._components.items(): + comp = dict(comp_data) + manifest_data["components"][comp_name] = { + "version": comp["version"], + "product_type": comp["product_type"], + "target_arch": comp["target_arch"], + "build_type": comp["build_type"], + "build_event": comp["build_event"], + "trigger_repo_name": comp["trigger_repo_name"], + "custom_params": comp["custom_params"], + "repository": comp["repositories"], + } + + try: + with path_to_save.open("w") as manifest: + yaml.dump(manifest_data, stream=manifest, Dumper=YamlDumper, default_flow_style=False, sort_keys=False) + except Exception as ex: + raise ManifestSavingError(ex) from ex + + def as_dict(self) -> Dict[str, Union[str, Dict]]: + """Return manifest as dictionary""" + if not self._manifest_file.is_file(): + raise ManifestDoesNotExist(f'Cannot find manifest "{self._manifest_file}"') + + with self._manifest_file.open("r") as manifest: + manifest_dict = yaml.safe_load(manifest) + + if not isinstance(manifest_dict, dict): + raise ManifestDoesNotExist(f'Incorrect manifest "{self._manifest_file}"') + + return manifest_dict + + +class Repository: + def __init__(self, **kwargs) -> None: + self._state: dict = { + "name": None, + "url": None, + "branch": None, + "revision": None, + "commit_id": None, + "commit_time": None, + "target_branch": None, + "target_revision": None, + "target_commit_id": None, + "merge_target": False, + "revert_time": None, + "trigger": False, + "default_branch": None, + "type": "git", + } + for arg_name, arg_value in kwargs.items(): + if arg_name in self._state: + self._state[arg_name] = arg_value + + def __getattr__(self, attr_name: str) -> Any: + if attr_name in self._state: + return self._state.get(attr_name) + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attr_name}'") + + def __iter__(self) -> Iterator: + for name in self._state: + yield name, self._state.get(name) + + def get_git_repo_state(self) -> dict: + state = deepcopy(self._state) + state.pop("revision") + state.pop("target_revision") + state.pop("commit_time") + state.pop("type") + state["commit_id"] = self._state["revision"] + state["target_commit_id"] = self._state["target_revision"] + return state + + +class Component: + def __init__( + self, + name: str, + version: str, + repositories: list, + product_type: str, + target_arch: str, + build_type: str, + build_event: str, + custom_params: Optional[dict] = None + ): + """ + Initialize the product component. + + :param name: Name of component + :param version: Version of component + :param repositories: List of repositories + :param product_type: Unique key to describe a product type (can include OS, arch, build variant, etc) + :param target_arch: Target architecture + :param build_type: Type of build (release, debug) + :param build_event: Build event (pre_commit, commit) + :param custom_params: Custom parameters (optional) + """ + self._name = name + self._version = version + self._repositories = {} + self._product_type = product_type + self._target_arch = target_arch + self._build_type = build_type + self._build_event = build_event + self._custom_params = custom_params if custom_params is not None else {} + self._trigger_repo_name = None + + self._prepare_repositories(repositories) + + def __iter__(self) -> Iterator: + yield "name", self._name + yield "version", self._version + yield "product_type", self._product_type + yield "target_arch", self._target_arch + yield "build_type", self._build_type + yield "build_event", self._build_event + yield "trigger_repo_name", self._trigger_repo_name + yield "custom_params", self._custom_params + yield "repositories", [dict(repo) for repo in self._repositories.values()] + + def _prepare_repositories(self, repositories: list) -> None: + for repo in repositories: + repo_name, repo_obj = self._parse_repository(repo) + self._repositories[repo_name] = repo_obj + + if repo_obj.trigger: + if self._trigger_repo_name: + raise WrongRepositoryFormatError( + f"Found trigger repo duplicates: {self._trigger_repo_name}, {repo_name}" + ) + self._trigger_repo_name = repo_name + + @staticmethod + def _parse_repository(repo: Union[dict, Repository]) -> tuple[str, Repository]: + if isinstance(repo, dict): + repo_name = repo["name"] + repo_obj = Repository(**repo) + elif isinstance(repo, Repository): + repo_name = repo.name + repo_obj = repo + return repo_name, repo_obj + + @staticmethod + def from_dict(comp_data: dict) -> Component: + """ + Convert a dictionary to a Component object. + + :param comp_data: Component data dictionary + :return: Component object + """ + try: + return Component( + comp_data["name"], + comp_data["version"], + comp_data["repository"], + comp_data["product_type"], + comp_data["target_arch"], + comp_data["build_type"], + comp_data["build_event"], + comp_data.get("custom_params"), + ) + except Exception as ex: + raise WrongComponentFormatError(ex) from ex + + @property + def name(self) -> str: + return self._name + + @property + def version(self) -> str: + return self._version + + @property + def product_type(self) -> str: + return self._product_type + + @property + def target_arch(self) -> str: + return self._target_arch + + @property + def build_type(self) -> str: + return self._build_type + + @property + def build_event(self) -> str: + return self._build_event + + @property + def repositories(self) -> List[Repository]: + return list(self._repositories.values()) + + @property + def trigger_repo_name(self) -> Optional[str]: + return self._trigger_repo_name + + @property + def trigger_repository(self) -> Optional[Repository]: + return next((repo for repo in self._repositories.values() if repo.trigger), None) + + def get_repository(self, repository_name: str) -> Optional[Repository]: + return self._repositories.get(repository_name) + + def add_repository(self, repository: Repository, replace: bool = False) -> bool: + if not replace and repository.name in self._repositories: + return False + self._repositories[repository.name] = repository + return True + + def delete_repository(self, repository_name: str) -> bool: + return self._repositories.pop(repository_name, None) is not None + + def get_custom_param(self, name: str) -> Optional[Any]: + return self._custom_params.get(name) + + def add_custom_param(self, name: str, value: Any) -> None: + self._custom_params[name] = value + + def delete_custom_param(self, name: str) -> bool: + return self._custom_params.pop(name, None) is not None diff --git a/.github/actions/create_manifest/requirements.txt b/.github/actions/create_manifest/requirements.txt new file mode 100644 index 00000000000000..eb0abf060b23b0 --- /dev/null +++ b/.github/actions/create_manifest/requirements.txt @@ -0,0 +1,2 @@ +GitPython~=3.1.43 +pyyaml~=6.0.1 diff --git a/.github/actions/store_artifacts/action.yml b/.github/actions/store_artifacts/action.yml new file mode 100644 index 00000000000000..d9c4184a622cce --- /dev/null +++ b/.github/actions/store_artifacts/action.yml @@ -0,0 +1,39 @@ +name: 'Store artifacts' +description: 'Store given artifacts in a proper place on a shared drive' +inputs: + artifacts: + description: "Multi-line list of artifacts to store" + required: true + storage_dir: + description: "Directory name to store artifacts in" + required: true + storage_root: + description: "Root path of the storage to place artifacts to" + required: true + + +outputs: + artifacts_storage_path: + description: "Path where the artifacts are stored" + value: ${{ steps.copy_artifacts.outputs.artifacts_storage_path }} + +runs: + using: "composite" + steps: + - name: Install Python dependencies + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + pip install -r ${{ env.ACTION_PATH }}/requirements.txt + env: + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} + + - name: 'Copy artifacts' + id: copy_artifacts + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + python ${{ env.ACTION_PATH }}/store_artifacts.py + --storage_dir "${{ inputs.storage_dir }}" --storage_root "${{ inputs.storage_root }}" + -a "${{ inputs.artifacts }}" + env: + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} diff --git a/.github/actions/store_artifacts/requirements.txt b/.github/actions/store_artifacts/requirements.txt new file mode 100644 index 00000000000000..eb0abf060b23b0 --- /dev/null +++ b/.github/actions/store_artifacts/requirements.txt @@ -0,0 +1,2 @@ +GitPython~=3.1.43 +pyyaml~=6.0.1 diff --git a/.github/actions/store_artifacts/store_artifacts.py b/.github/actions/store_artifacts/store_artifacts.py new file mode 100644 index 00000000000000..7dde088dc91593 --- /dev/null +++ b/.github/actions/store_artifacts/store_artifacts.py @@ -0,0 +1,134 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import argparse +import logging +import os +import re +import sys +import git +import shutil +from contextlib import contextmanager +from pathlib import Path + + +def parse_args(): + parser = argparse.ArgumentParser(description='Returns product components changed in a given PR or commit') + parser.add_argument('-e', '--event_name', help='Name of GitHub event', required=False) + parser.add_argument('-b', '--branch_name', help='Name of GitHub branch', required=False) + parser.add_argument('-s', '--commit_sha', help='Commit hash for which artifacts were generated', required=False) + parser.add_argument('-a', '--artifacts', type=str, help='Paths to artifacts to store (files/dirs)', required=True) + parser.add_argument('--storage_dir', help='Directory name to store artifacts in', required=True) + parser.add_argument('--storage_root', help='Root path of the storage to place artifacts to', required=True) + args = parser.parse_args() + return args + + +def init_logger(): + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-15s %(levelname)-8s %(message)s', + datefmt='%m-%d-%Y %H:%M:%S') + + +def set_github_output(name: str, value: str, github_output_var_name: str = 'GITHUB_OUTPUT'): + """Sets output variable for a GitHub Action""" + logger = logging.getLogger(__name__) + # In an environment variable "GITHUB_OUTPUT" GHA stores path to a file to write outputs to + with open(os.environ.get(github_output_var_name), 'a+') as file: + logger.info(f"Add {name}={value} to {github_output_var_name}") + print(f'{name}={value}', file=file) + + +@contextmanager +def preserve_stats_context(): + """ + Workaround for copying to samba share on Linux + to avoid issues while setting Linux permissions. + """ + _orig_copystat = shutil.copystat + shutil.copystat = lambda x, y, follow_symlinks=True: x + try: + yield + finally: + shutil.copystat = _orig_copystat + + +def rotate_dir(directory: Path) -> bool: + """ + Renames directory if exists: + dir -> dir_1 + """ + log = logging.getLogger('rotate_dir') + + if not directory.exists(): + return False + + dir_parent = directory.parent + dir_name = directory.name + max_dir_num = 0 + for redir in dir_parent.iterdir(): + dir_num = redir.name.split('_')[-1] + if redir.name.startswith(dir_name) and dir_num.isdigit() and int(dir_num) > max_dir_num: + max_dir_num = int(dir_num) + + duplicate = dir_parent / f'{dir_name}_{max_dir_num + 1}' + log.info(f"Move previous directory to {duplicate}") + directory.rename(duplicate) + return True + + +def main(): + init_logger() + logger = logging.getLogger(__name__) + args = parse_args() + + event_name = args.event_name or os.getenv('GITHUB_EVENT_NAME') + branch_name = args.branch_name or os.getenv('GITHUB_BASE_REF') or os.getenv('GITHUB_REF_NAME') + + # TODO: return, once we decide to get rid of post-commit and choose artifacts generated for a merged PR in queue? + # merge_queue_matcher = re.search(r'gh-readonly-queue/(.*?)/pr-', branch_name) + # if merge_queue_matcher: + # branch_name = merge_queue_matcher.group(1) + + commit_hash = args.commit_sha or os.getenv('PR_HEAD_SHA') or os.getenv('GITHUB_SHA') + event_type = 'pre_commit' if event_name == 'pull_request' else 'commit' + storage_root = args.storage_root or os.getenv('ARTIFACTS_SHARE') + + storage = Path(storage_root) / 'dldt' / branch_name / event_type / commit_hash / args.storage_dir + set_github_output("artifacts_storage_path", str(storage)) + + logger.info(f"Storing artifacts to {storage}") + rotate_dir(storage) # TODO: use more stable approach to handle storing artifacts from re-runs + + error_found = False + for artifact in args.artifacts.split(): + artifact_path = Path(artifact) + logger.debug(f"Copying {artifact_path} to {storage / artifact_path.name}") + try: + with preserve_stats_context(): + if artifact_path.is_dir(): + shutil.copytree(artifact_path, storage / artifact_path.name) + else: + storage.mkdir(parents=True, exist_ok=True) + shutil.copy2(artifact_path, storage / artifact_path.name) + except Exception as e: + logger.error(f'Failed to copy {artifact}: {e}') + error_found = True + + github_server = os.getenv('GITHUB_SERVER_URL') + if github_server: # If running from GHA context + # TODO: write an exact job link, but it's not trivial to get + workflow_link = f"{github_server}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + with open(storage / 'workflow_link.txt', 'w') as file: + file.write(workflow_link) + + logger.debug(f"Copying finished") + (storage / 'copying_finished').touch() + if error_found: + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/.github/components.yml b/.github/components.yml index 9c5d7c0089c9ca..8de51a2ced3343 100644 --- a/.github/components.yml +++ b/.github/components.yml @@ -111,6 +111,7 @@ IR_FE: ONNX_FE: revalidate: - MO + - OVC - ONNX_RT build: - CPU @@ -119,6 +120,7 @@ ONNX_FE: PDPD_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -126,6 +128,7 @@ PDPD_FE: TF_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -134,6 +137,7 @@ TF_FE: TFL_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -141,6 +145,7 @@ TFL_FE: PyTorch_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -148,6 +153,7 @@ PyTorch_FE: JAX_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -165,6 +171,7 @@ Python_API: revalidate: - samples - MO + - OVC - tools - TF_FE build: @@ -207,6 +214,18 @@ IE_Tests: build: - IR_FE +OVC: + revalidate: + - PyTorch_FE + - TF_FE + - TFL_FE + - ONNX_FE + - PDPD_FE + - JAX_FE + build: + - Python_API + - TOKENIZERS # TF_FE tests depends on tokenizers build + MO: revalidate: - PyTorch_FE diff --git a/.github/dependency_review.yml b/.github/dependency_review.yml index 11639f4d2d4b22..5636a441501fc8 100644 --- a/.github/dependency_review.yml +++ b/.github/dependency_review.yml @@ -13,6 +13,7 @@ allow-licenses: - '0BSD' - 'Python-2.0' - 'LGPL-3.0' + - 'MPL-2.0' fail-on-scopes: - 'runtime' - 'development' diff --git a/.github/github_org_control/config.json b/.github/github_org_control/config.json index 717403f27d13ea..7fc23b7888c170 100644 --- a/.github/github_org_control/config.json +++ b/.github/github_org_control/config.json @@ -37,6 +37,7 @@ "openvino-onnx-frontend-maintainers": "category: ONNX FE", "openvino-ie-tests-maintainers": "category: IE Tests", "openvino-mo-maintainers": "category: MO", + "openvino-ovc-maintainers": "category: OVC", "openvino-ngraph-maintainers": "category: Core", "openvino-scripts-maintainers": "category: build", "openvino-tests-maintainers": "category: IE Tests", diff --git a/.github/labeler.yml b/.github/labeler.yml index 64a8661cf1e2e8..49aeac7325aa4f 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -97,8 +97,10 @@ 'category: MO': - 'tools/mo/**/*' -- 'tools/ovc/**/*' - 'tests/layer_tests/mo_python_api_tests/**/*' + +'category: OVC': +- 'tools/ovc/**/*' - 'tests/layer_tests/ovc_python_api_tests/**/*' 'category: ONNX FE': diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json index 51e8106944ca9c..3d59bb9a1e569f 100644 --- a/.github/scripts/workflow_rerun/errors_to_look_for.json +++ b/.github/scripts/workflow_rerun/errors_to_look_for.json @@ -58,5 +58,9 @@ { "error_text": "status_string: \"Timeout was reached\"", "ticket": 142653 + }, + { + "error_text": "ERROR 502: Bad Gateway", + "ticket": 146254 } ] \ No newline at end of file diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index de5b6c0011e34d..25081433f48f10 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -175,7 +175,7 @@ jobs: # Upload build logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 66d4c8067edea9..b590d093207e39 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -72,13 +72,13 @@ jobs: echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV - name: 'Upload sphinx.log' - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: sphinx_build_log_${{ env.PR_NUMBER }}.log path: build/docs/sphinx.log - name: 'Upload docs html' - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_docs_html_${{ env.PR_NUMBER }}.zip path: build/docs/openvino_docs_html.zip @@ -95,7 +95,7 @@ jobs: - name: 'Upload test results' if: failure() - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_docs_pytest path: build/docs/_artifacts/ diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 223a7418156e43..f43e60538a6089 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -31,7 +31,7 @@ jobs: - name: suggester / clang-format if: startsWith(github.event_name, 'pull_request') - uses: reviewdog/action-suggester@a1d57ff096639094e0ba35ef3039e79316364796 # v1.15.0 + uses: reviewdog/action-suggester@63b8f8cc21dfa052ac44436e65ed31edcffcb6c1 # v1.17.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} level: warning @@ -60,7 +60,7 @@ jobs: # always provide suggestions even for skipped scripts in ov_shellcheck tagret - name: ShellCheck action if: always() - uses: reviewdog/action-shellcheck@52f34f737a16c65b8caa8c51ae1b23036afe5685 # v1.23.0 + uses: reviewdog/action-shellcheck@d99499e855260c9c56f7a1d066933b57326e9e7c # v1.26.0 with: level: style reporter: github-pr-review diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ef0fd80e7813cf..da790552c239c1 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -139,7 +139,7 @@ jobs: run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-configure -c ${COVERITY_TOOL_DIR}/cov-analysis-linux64-2023.6.2/config/coverity_config.xml -lscc text - name: Upload Coverity build log - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: coverity_logs @@ -147,7 +147,7 @@ jobs: if-no-files-found: 'error' - name: Upload Coverity build archive - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: coverity_archive diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index 02cd0abf018319..5833c1d2000fa7 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -189,7 +189,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -198,7 +198,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -206,7 +206,7 @@ jobs: - name: Upload openvino RPM packages if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_rpm_packages path: ${{ env.BUILD_DIR }}/*.rpm @@ -214,7 +214,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -234,7 +234,7 @@ jobs: steps: - name: Download OpenVINO RPM packages - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_rpm_packages path: ${{ env.RPM_PACKAGES_DIR }} diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 04fa0c8860ab66..986c2c42315371 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -33,13 +33,13 @@ jobs: PARALLEL_TEST_CACHE: ${{ github.workspace }}/install/tests/test_cache.lst steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -108,7 +108,7 @@ jobs: key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: test-results-functional-cpu diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 92c12dfcd71251..29c656f416ecbc 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -35,13 +35,13 @@ jobs: INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -255,7 +255,7 @@ jobs: ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-cpp diff --git a/.github/workflows/job_debian_packages.yml b/.github/workflows/job_debian_packages.yml index a7547a2483dd16..a8f2731563f779 100644 --- a/.github/workflows/job_debian_packages.yml +++ b/.github/workflows/job_debian_packages.yml @@ -33,7 +33,7 @@ jobs: run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - name: Download OpenVINO debian packages - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_debian_packages path: ${{ env.DEBIAN_PACKAGES_DIR }} diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index 7a5af97cdcde49..5d9fb1172e62cb 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -38,13 +38,13 @@ jobs: GTEST_PARALLEL_SCRIPT: ${{ github.workspace }}/gtest_parallel.py steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: 'openvino_package' path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: 'openvino_tests' path: ${{ env.INSTALL_TEST_DIR }} @@ -128,7 +128,7 @@ jobs: - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: test-results-${{ inputs.test_type }}-${{ inputs.device }} diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index 3fac0998d88ced..19bf3b23482b89 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -38,13 +38,13 @@ jobs: if: ${{ github.event_name != 'merge_group' }} steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_onnx_runtime.yml b/.github/workflows/job_onnx_runtime.yml index ae0f21bf58ab37..b7da6d827d542d 100644 --- a/.github/workflows/job_onnx_runtime.yml +++ b/.github/workflows/job_onnx_runtime.yml @@ -43,7 +43,7 @@ jobs: ONNX_RUNTIME_BUILD_DIR: ${{ github.workspace }}/onnxruntime/build steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} diff --git a/.github/workflows/job_openvino_js.yml b/.github/workflows/job_openvino_js.yml index 25e29dd3f3f9c3..880726bd0d5878 100644 --- a/.github/workflows/job_openvino_js.yml +++ b/.github/workflows/job_openvino_js.yml @@ -45,7 +45,7 @@ jobs: echo "OPENVINO_JS_LIBS_DIR=$GITHUB_WORKSPACE/openvino/src/bindings/js/node/bin" >> "$GITHUB_ENV" - name: Download OpenVINO JS package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_js_package path: ${{ env.OPENVINO_JS_LIBS_DIR }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 4c7a14e891b49e..e1bd58fb781d69 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -41,13 +41,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -306,7 +306,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-python diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index c740cd89079ec2..b910d9242647b1 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -49,19 +49,19 @@ jobs: fi - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tokenizers_wheel path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -134,7 +134,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly_scope1' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_tests.html --self-contained-html -v -n 4 -k "TestTimmConvertModel or TestTorchHubConvertModel" + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_${{ inputs.model_scope }}_tests.html --self-contained-html -v -n 2 -k "TestTimmConvertModel or TestTorchHubConvertModel" env: TYPE: ${{ inputs.model_scope == 'precommit' && 'precommit' || 'nightly' }} TEST_DEVICE: CPU @@ -144,7 +144,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly_scope2' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v -k "not (TestTimmConvertModel or TestTorchHubConvertModel)" + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_${{ inputs.model_scope }}_tests.html --self-contained-html -v -k "not (TestTimmConvertModel or TestTorchHubConvertModel)" env: TYPE: ${{ inputs.model_scope == 'precommit' && 'precommit' || 'nightly' }} TEST_DEVICE: CPU @@ -155,7 +155,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 4 + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 2 env: TEST_DEVICE: CPU USE_SYSTEM_CACHE: False @@ -180,10 +180,10 @@ jobs: df -h - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: - name: test-results-torch-models + name: test-results-torch-models-${{ inputs.model_scope == 'precommit' }} path: | ${{ env.INSTALL_TEST_DIR }}/TEST-torch* if-no-files-found: 'error' diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index e453210d58b13b..2fce9965e36b6c 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -34,13 +34,13 @@ jobs: BUILD_DIR: ${{ github.workspace }}/build steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 168d9bf61308d7..9c2392093ab446 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -44,19 +44,19 @@ jobs: LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tokenizers_wheel path: ${{ env.INSTALL_DIR }} @@ -158,7 +158,7 @@ jobs: TEST_PRECISION: FP16 - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-python-tf-layers diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index 1a452c94db0ace..ab8163139e4a2b 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -37,19 +37,19 @@ jobs: NUMBER_OF_REPLICAS: 2 steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tokenizers_wheel path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -114,7 +114,7 @@ jobs: TEST_DEVICE: CPU - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-tensorflow-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 9cf1acc05e7220..e1ef48b14ee7d9 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -73,7 +73,7 @@ jobs: ref: 'master' - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} @@ -137,7 +137,7 @@ jobs: - name: Upload openvino tokenizers wheel if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tokenizers_wheel path: ${{ env.EXTENSION_BUILD_DIR }}/*.whl diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index da0309e20b37bd..80ad7ffa92c4f1 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -109,6 +109,9 @@ jobs: BUILD_DIR: /__w/openvino/openvino/openvino_build SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release ONNX_RUNTIME_UTILS: /__w/openvino/openvino/openvino/src/frontends/onnx/tests/ci_utils/onnxruntime + ARTIFACTS_SHARE: "/mount/build-artifacts" + MANIFEST_PATH: '/__w/openvino/openvino/manifest.yml' + PRODUCT_TYPE: 'public_linux_ubuntu_20_04_release' if: "!needs.smart_ci.outputs.skip_workflow" steps: @@ -135,6 +138,18 @@ jobs: submodules: 'true' ref: 'master' + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./openvino/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + ${{ env.OPENVINO_CONTRIB_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: 'intel64' + build_type: 'release' + save_to: ${{ env.MANIFEST_PATH }} + # # Print system info # @@ -254,7 +269,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -263,7 +278,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -271,7 +286,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -279,7 +294,7 @@ jobs: - name: Upload openvino developer package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_developer_package path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz @@ -287,7 +302,7 @@ jobs: - name: Upload openvino debian packages if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_debian_packages path: ${{ env.BUILD_DIR }}/*.deb @@ -295,12 +310,34 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz if-no-files-found: 'error' + - name: Prepare debian packages for storage on share + if: ${{ always() }} + continue-on-error: true + run: | + pushd ${{ env.BUILD_DIR }} + mkdir deb && mv *.deb deb/ + popd + + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./openvino/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + ${{ env.BUILD_DIR }}/deb + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + Debian_Packages: name: Debian Packages needs: Build @@ -360,13 +397,13 @@ jobs: # - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -425,7 +462,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-${{ env.TEST_DEVICE }} path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz @@ -451,7 +488,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ matrix.TEST_TYPE == 'API' }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-TEMPLATE path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz @@ -553,7 +590,7 @@ jobs: # - /mount:/mount PyTorch_Models_Tests: name: PyTorch Models tests - if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test + if: ${{ github.event_name != 'schedule' && fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test }} needs: [ Build, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_pytorch_models_tests.yml with: @@ -612,13 +649,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO Developer package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_developer_package path: ${{ env.INSTALL_DIR }} diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 825a4b076d631d..d38eda93e7d2b8 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -104,6 +104,9 @@ jobs: BUILD_DIR: /__w/openvino/openvino/openvino_build SCCACHE_AZURE_KEY_PREFIX: 'ubuntu20_aarch64_Release' ONNX_RUNTIME_UTILS: /__w/openvino/openvino/openvino/src/frontends/onnx/tests/ci_utils/onnxruntime + ARTIFACTS_SHARE: "/mount/build-artifacts" + MANIFEST_PATH: '/__w/openvino/openvino/manifest.yml' + PRODUCT_TYPE: 'public_linux_ubuntu_20_04_arm64_release' if: "!needs.smart_ci.outputs.skip_workflow" steps: @@ -121,6 +124,18 @@ jobs: submodules: 'true' ref: 'master' + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./openvino/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + ${{ env.OPENVINO_CONTRIB_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: 'aarch64' + build_type: 'release' + save_to: ${{ env.MANIFEST_PATH }} + # # Print system info # @@ -167,7 +182,7 @@ jobs: -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ - -DOV_CPU_AARCH64_USE_MULTI_ISA=OFF \ + -DOV_CPU_AARCH64_USE_MULTI_ISA=ON \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} @@ -247,7 +262,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -256,7 +271,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -264,7 +279,7 @@ jobs: - name: Upload openvino developer package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_developer_package path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz @@ -272,7 +287,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -280,7 +295,7 @@ jobs: - name: Upload openvino debian packages if: ${{ 'false' }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_debian_packages path: ${{ env.BUILD_DIR }}/*.deb @@ -288,12 +303,25 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz if-no-files-found: 'error' + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./openvino/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + Debian_Packages: name: Debian Packages needs: Build diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index cdae9c77af9b40..cfccad5fe23e12 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -220,7 +220,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -229,7 +229,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -237,7 +237,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.tar.gz @@ -245,7 +245,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -293,7 +293,7 @@ jobs: ref: 'master' - name: Download selective build statistics package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_selective_build_stat path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index 6f089f205d3b1d..5227eb3eacdac9 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -186,7 +186,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -194,7 +194,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -230,13 +230,13 @@ jobs: run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: ${{ format('openvino_package_{0}', matrix.SANITIZER) }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: ${{ format('openvino_tests_{0}', matrix.SANITIZER) }} path: ${{ env.INSTALL_TEST_DIR }} @@ -462,7 +462,7 @@ jobs: ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-cpp diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 32f5474d14ce76..3880f8333f18c5 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -204,7 +204,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -212,7 +212,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -220,7 +220,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 26eb440eb87cb2..8386f54719b02c 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -204,7 +204,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -212,7 +212,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -220,7 +220,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index 3a9b23ea2685db..7bd7fe3d840222 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -49,7 +49,7 @@ jobs: git diff > samples_diff.diff working-directory: samples/python - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: failure() with: name: samples_diff @@ -67,7 +67,7 @@ jobs: git diff > pyopenvino_diff.diff working-directory: src/bindings/python/src/openvino - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: failure() with: name: pyopenvino_diff @@ -85,7 +85,7 @@ jobs: git diff > wheel_diff.diff working-directory: src/bindings/python/wheel - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: failure() with: name: wheel_diff diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 0e49752fc92968..26ed3615ea973a 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -64,6 +64,9 @@ jobs: INSTALL_DIR_JS: "${{ github.workspace }}\\openvino_install\\js" INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" BUILD_DIR: "${{ github.workspace }}\\openvino_build" + ARTIFACTS_SHARE: "C:\\mount\\build-artifacts" + MANIFEST_PATH: "${{ github.workspace }}\\manifest.yml" + PRODUCT_TYPE: 'public_windows_vs2019_release' # TODO: specify version of compiler here if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }} @@ -81,6 +84,27 @@ jobs: path: 'openvino_contrib' ref: 'master' + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' + self-hosted-runner: 'true' + show-cache-info: 'true' + + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./openvino/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + ${{ env.OPENVINO_CONTRIB_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: 'intel64' + build_type: 'release' + save_to: ${{ env.MANIFEST_PATH }} + # # Print system info # @@ -92,15 +116,6 @@ jobs: # Dependencies # - - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ env.PYTHON_VERSION }} - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'true' - self-hosted-runner: 'true' - show-cache-info: 'true' - - name: Install python dependencies run: | # For Python API: build and wheel packaging @@ -219,14 +234,14 @@ jobs: # - name: Upload openvino package - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.zip if-no-files-found: 'error' - name: Upload openvino tests package - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -234,12 +249,24 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} if-no-files-found: 'error' + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./openvino/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.zip + ${{ env.BUILD_DIR }}/openvino_tests.zip + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + Samples: needs: [ Build, Smart_CI ] if: fromJSON(needs.smart_ci.outputs.affected_components).samples @@ -257,13 +284,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -351,7 +378,7 @@ jobs: path: 'openvino' - name: Download OpenVINO js package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_js_package path: ${{ env.OPENVINO_JS_LIBS_DIR }} @@ -422,13 +449,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -580,7 +607,7 @@ jobs: run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-python @@ -610,13 +637,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -786,7 +813,7 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-cpp @@ -810,13 +837,13 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -870,7 +897,7 @@ jobs: key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-functional-cpu diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 977f9aee91bcce..963a6edb37a56a 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -247,7 +247,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip @@ -255,7 +255,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -292,7 +292,7 @@ jobs: ref: 'master' - name: Download selective build statistics package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_selective_build_stat path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} @@ -355,7 +355,7 @@ jobs: steps: - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -397,7 +397,7 @@ jobs: timeout-minutes: 60 - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-functional-cpu diff --git a/cmake/toolchains/onecoreuap.toolchain.cmake b/cmake/toolchains/onecoreuap.toolchain.cmake index af4285ee124117..b9c71254e09c15 100644 --- a/cmake/toolchains/onecoreuap.toolchain.cmake +++ b/cmake/toolchains/onecoreuap.toolchain.cmake @@ -50,8 +50,12 @@ endif() unset(_onecoreuap_arch) # compile flags +if(CMAKE_GENERATOR MATCHES "Ninja") + set(includes "/I\"\$\$\(UniversalCRT_IncludePath\)\"") +else() + set(includes "/I\"\$\(UniversalCRT_IncludePath\)\"") +endif() -set(includes "/I\"\$\(UniversalCRT_IncludePath\)\"") set(CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS_INIT} ${includes}") set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} ${includes}") unset(includes) diff --git a/docs/articles_en/about-openvino.rst b/docs/articles_en/about-openvino.rst index a9b599960d2e2b..dbe5f6d3c1061f 100644 --- a/docs/articles_en/about-openvino.rst +++ b/docs/articles_en/about-openvino.rst @@ -1,5 +1,3 @@ -.. {#about_openvino} - About OpenVINO ============== @@ -10,6 +8,7 @@ About OpenVINO about-openvino/performance-benchmarks about-openvino/compatibility-and-support + about-openvino/contributing Release Notes OpenVINO is a toolkit for simple and efficient deployment of various deep learning models. diff --git a/docs/articles_en/about-openvino/contributing.rst b/docs/articles_en/about-openvino/contributing.rst new file mode 100644 index 00000000000000..f14e5f58249259 --- /dev/null +++ b/docs/articles_en/about-openvino/contributing.rst @@ -0,0 +1,169 @@ +Contribute to OpenVINO +======================== + +.. toctree:: + :maxdepth: 1 + :hidden: + + contributing/code-contribution-guide + +OpenVINO™ is always looking for opportunities to improve and your contributions +play a big role in this process. Here are four ways you can make OpenVINO better: + +- `Provide feedback <#provide-feedback>`__ +- `Contribute code changes <#contribute-code-changes>`__ +- `Improve documentation <#improve-documentation>`__ +- `Promote and support OpenVINO <#promote-and-support-openvino>`__ + + +:fas:`comments` Provide feedback +################################ + +.. rubric:: Report bugs / issues + :name: report-bugs-issues + +If you notice unexpected behavior in OpenVINO or its components, you can +`create a new issue `__ +in the GitHub issue tracker. + +.. rubric:: Propose improvements + :name: propose-improvements + +If you want to share your ideas for improving OpenVINO: + +- Open a new `GitHub Discussion `__. +- Create a `Feature Request Issue `__ + if your idea is already well defined. + +In both cases, provide a detailed description and list potential use cases, +benefits, and challenges. Keep in mind that even if your input is not immediately +prioritized, it may be used at a later or undertaken by the community. + + +:fas:`code-branch` Contribute code changes +########################################## + +Always check if the change is still needed! Verify if +`the issue `__ or +`request `__ is still open +and nobody has started working on it. If the ticket is already work in progress, +you can always ask if you can help. + +**Address only the issues that affect the master or** +:doc:`LTS release branches <./release-notes-openvino/release-policy>`. + +**Do not start work on contributions, if a proper issue/ request has not been created.** + +.. tip:: + + If you want to start with something simple, check out + `first-time contributions `__. + + +.. rubric:: Fix bugs + :name: fix-bugs + +Choose one of the issues reported in +`GitHub Issue Tracker `__ and +`create a Pull Request `__ +(PR) addressing it. + +If you find a new bug and want to fix it, you should still +create a new issue before working on the PR. This way, it will be easier for other +developers to track changes. + +.. rubric:: Develop new features + :name: develop-new-features + +If you find a `Feature Request `__ +you want to work on, make sure it is clearly defined. If you have any doubts, +or the change is complex, `discuss it `__ +with OpenVINO developers first. + +If you have an idea for a new feature and want +to develop it, you should still create a Feature Request before working on the +PR. This way, it will be easier for other developers to track changes. + +.. rubric:: Develop a new device plugin + :name: develop-new-device-plugin + +If you want to run inference on a device that is currently not supported, you +can see how to develop a new plugin for it in the +`Plugin Developer Guide `__. + + +:fas:`file-alt` Improve documentation +##################################### + +OpenVINO user documentation is built from several sources, mainly the files in +the `docs/articles_en `__ +folder, using `Sphinx `__ and the +`reStructuredText `__ +markup language. + +OpenVINO `developer documentation `__ +is available only in markdown in the `docs/dev `__ +folder. + +To edit docs, consider using the Editor’s +`guide `__ +and contacting `documentation maintainers `__, +who will help you with information architecture and formatting, as well as +review, adjust, and merge the PR. + +.. rubric:: Review user documentation + :name: review-user-documentation + +In most cases, creating a PR is enough to correct a documentation mistake, improve +the language, and update or extend the information. For your convenience, the +top-right panel of most pages includes the “Edit on GitHub” button that will +take you to the source file of the given article. + +.. rubric:: Write new content + :name: write-new-content + +For more extensive changes in docs, reach out to any of the +`documentation maintainers `__ +to discuss the new content. + + +:fas:`bullhorn` Promote and support OpenVINO +############################################ + +.. rubric:: Popularize OpenVINO + :name: popularize-openvino + +Articles, tutorials, blog posts, demos, videos, and any other involvement in the +OpenVINO community is more than welcome. If you discuss or present OpenVINO on +various social platforms, you are raising awareness of the product among AI +enthusiasts and enabling other people to discover the toolkit. + +Feel free to reach out to OpenVINO developers if you need help with making a +contribution. You can also contact +`documentation maintainers `__ +, if you need help with visuals, brand materials, or content creation in general. + +.. rubric:: Help other community members + :name: help-community + +If you are an experienced OpenVINO user and want to help, you can share your +expertise with the community at any time. Check GitHub +`Discussions `__ +and `Issues `__ to see if +you can help someone. + +.. note:: + + By contributing to the OpenVINO project, you agree that your contributions + will be licensed under `the terms of the OpenVINO repository `__. + + +Additional Resources +##################### + +- :doc:`Code Contribution Guide <./contributing/code-contribution-guide>` +- Choose a `"Good First Issue" `__. +- Learn more about `OpenVINO architecture `__. +- Check out a `blog post on contributing to OpenVINO `__. +- Visit `Intel DevHub Discord server `__ to join + discussions and talk to OpenVINO developers. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/contributing/code-contribution-guide.rst b/docs/articles_en/about-openvino/contributing/code-contribution-guide.rst new file mode 100644 index 00000000000000..a74bb586e18130 --- /dev/null +++ b/docs/articles_en/about-openvino/contributing/code-contribution-guide.rst @@ -0,0 +1,88 @@ +Code Contribution Guide +======================= + +This section will start you off with a few simple steps to begin your code contribution. +If you have any doubts, talk to +`the development team `__. +Remember, your questions help us keep improving OpenVINO. + + +1. **Choose the issue you want to work on.** + + Choose one of the existing `issues `__ / + requests. The `“Good First Issue” `__ + board is a good place to start. If you have a new idea for the contribution, + make sure to first create a proper issue, discussion, or feature request. + + Here are some of the components you may choose to work on. + + .. tab-set:: + + .. tab-item:: APIs + + - `Core C++ API `__ + - `C API `__ + - `Python API `__ + - `JavaScript (Node.js) API `__ + + .. tab-item:: Frontends + + - `IR Frontend `__ + - `ONNX Frontend `__ + - `PaddlePaddle Frontend `__ + - `PyTorch Frontend `__ + - `TensorFlow Frontend `__ + - `TensorFlow Lite Frontend `__ + + .. tab-item:: Plugins + + - `Auto plugin `__ + - `CPU plugin `__ + - `GPU plugin `__ + - `NPU plugin `__ + - `Hetero plugin `__ + - `Template plugin `__ + + .. tab-item:: Tools + + - `Benchmark Tool `__ + - `Model Conversion `__ + +2. **Assign yourself to the issue.** + + To get assigned to a task, simply leave a comment with the ``.take`` command in + the selected issue. You can always ask OpenVINO developers for guidance, + both technical and organizational: + + - assign users in the **“Contact points”** section, + - visit `Intel DevHub Discord server `__ to ask + questions in the channel dedicated to **“Good First Issue”** support, or any other. + +3. **Build OpenVINO.** + + In order to build OpenVINO, follow the + `build instructions for your specific OS `__. + + Use the local build and the information found in the issue description to + develop your contribution. + +4. **Submit a PR with your changes.** + + Follow the `guidelines `__ + and do not forget to `link your Pull Request to the issue `__ + it addresses. + +5. **Wait for a review.** + + We will make sure to review your **Pull Request** as soon as possible and provide feedback. + You can expect a merge once your changes have been validated with automatic tests and + approved by `maintainers `__. + + +Additional Resources +##################### + +- Choose a `“Good First Issue” `__. +- Learn more about `OpenVINO architecture `__. +- Check out a `blog post on contributing to OpenVINO `__. +- Visit `Intel DevHub Discord server `__ to join discussions and talk to OpenVINO developers. \ No newline at end of file diff --git a/docs/articles_en/assets/images/quantization_scheme.svg b/docs/articles_en/assets/images/quantization_scheme.svg new file mode 100644 index 00000000000000..b58934ec08e57d --- /dev/null +++ b/docs/articles_en/assets/images/quantization_scheme.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d197730e090d582d7ae1f68d139564b845bba5eb9aa168437c2b80f53545e706 +size 100328 diff --git a/docs/articles_en/assets/snippets/main.py b/docs/articles_en/assets/snippets/main.py index a063a1645f0ec1..4d5429cd4b7925 100644 --- a/docs/articles_en/assets/snippets/main.py +++ b/docs/articles_en/assets/snippets/main.py @@ -9,7 +9,7 @@ from contextlib import redirect_stdout, redirect_stderr -skip_snippets = ["main.py", "__init__.py", "utils.py", "ov_common.py"] +skip_snippets = ["main.py", "__init__.py", "utils.py", "ov_common.py", "ov_stateful_model_intro.py"] def import_python_modules(directory, subdirectory=""): for item in os.listdir(directory): diff --git a/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp new file mode 100644 index 00000000000000..75eb50839ca117 --- /dev/null +++ b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +int main() { + ov::Core core; + auto model = core.read_model("model.xml"); + auto input = model->get_parameters().at(0); + + auto compiled_model = core.compile_model(model, "NPU"); + auto npu_context = compiled_model.get_context().as(); + + auto in_element_type = input->get_element_type(); + auto in_shape = input->get_shape(); + + { + //! [default_context_from_core] + auto npu_context = core.get_default_context("NPU").as(); + // Extract raw level zero context handle from RemoteContext + void* context_handle = npu_context.get(); + //! [default_context_from_core] + } + + { + //! [default_context_from_model] + auto npu_context = compiled_model.get_context().as(); + // Extract raw level zero context handle from RemoteContext + void* context_handle = npu_context.get(); + //! [default_context_from_model] + } + + { + //! [wrap_nt_handle] + void* shared_buffer = nullptr; // create the NT handle + auto remote_tensor = npu_context.create_tensor(in_element_type, in_shape, shared_buffer); + //! [wrap_nt_handle] + } + + { + //! [wrap_dmabuf_fd] + int32_t fd_heap; // create the DMA-BUF System Heap file descriptor + auto remote_tensor = npu_context.create_tensor(in_element_type, in_shape, fd_heap); + //! [wrap_dmabuf_fd] + } + + { + //! [allocate_remote_level_zero_host] + auto remote_tensor = npu_context.create_l0_host_tensor(in_element_type, in_shape); + // Extract raw level zero pointer from remote tensor + void* level_zero_ptr = remote_tensor.get(); + //! [allocate_remote_level_zero_host] + } + + { + //! [allocate_level_zero_host] + auto tensor = npu_context.create_host_tensor(in_element_type, in_shape); + // Extract raw level zero pointer from remote tensor + void* level_zero_ptr = tensor.data(); + //! [allocate_level_zero_host] + } + + return 0; +} diff --git a/docs/articles_en/assets/snippets/ov_caching.cpp b/docs/articles_en/assets/snippets/ov_caching.cpp index cefb3da55c7827..891d3e9368292d 100644 --- a/docs/articles_en/assets/snippets/ov_caching.cpp +++ b/docs/articles_en/assets/snippets/ov_caching.cpp @@ -1,10 +1,10 @@ #include +//! [ov:caching:part0] void part0() { std::string modelPath = "/tmp/myModel.xml"; - std::string device = "GPU"; + std::string device = "GPU"; // For example: "CPU", "GPU", "NPU". ov::AnyMap config; -//! [ov:caching:part0] ov::Core core; // Step 1: create ov::Core object core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching auto model = core.read_model(modelPath); // Step 2: Read Model diff --git a/docs/articles_en/assets/snippets/ov_caching.py b/docs/articles_en/assets/snippets/ov_caching.py index c03e8b34cfe9ce..4ce0b91ccd7506 100644 --- a/docs/articles_en/assets/snippets/ov_caching.py +++ b/docs/articles_en/assets/snippets/ov_caching.py @@ -8,6 +8,7 @@ import openvino.properties as props +# For example: "CPU", "GPU", "NPU". device_name = 'CPU' model_path = get_path_to_model() path_to_cache_dir = get_temp_dir() diff --git a/docs/articles_en/assets/snippets/ov_stateful_model_intro.py b/docs/articles_en/assets/snippets/ov_stateful_model_intro.py new file mode 100644 index 00000000000000..f9e84b2891a7a3 --- /dev/null +++ b/docs/articles_en/assets/snippets/ov_stateful_model_intro.py @@ -0,0 +1,210 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging as log +import numpy as np + +import openvino as ov +from openvino.runtime import opset13 as ops +from openvino.runtime.op.util import VariableInfo, Variable +from openvino.runtime.passes import LowLatency2, MakeStateful, Manager +from openvino.runtime.utils import replace_node + + +def state_model_example(): + #! [ov:stateful_model] + input = ops.parameter([1, 1], dtype=np.float32, name="data") + init_const = ops.constant([[0]], dtype=np.float32) + + # Typically ReadValue/Assign operations are presented as pairs in models. + # ReadValue operation reads information from an internal memory buffer, Assign operation writes data to this buffer. + # For each pair, its own Variable object must be created. + # Variable defines name, shape and type of the buffer. + var_info = VariableInfo() + var_info.data_shape = init_const.get_shape() + var_info.data_type = init_const.get_element_type() + var_info.variable_id = "variable0" + variable = Variable(var_info) + + # Creating Model + read = ops.read_value(init_const, variable) + add = ops.add(input, read) + assign = ops.assign(add, variable) + result = ops.result(add) + model = ov.Model(results=[result], sinks=[assign], parameters=[input], name="model") + #! [ov:stateful_model] + + return model + + +def low_latency_2_example(): + #! [ov:low_latency_2] + # Precondition for Model. + # TensorIterator and Parameter are created in body of TensorIterator with names + tensor_iterator_name = "TI_name" + body_parameter_name = "body_parameter_name" + idx = "0" # this is a first variable in the model + + # The State will be named "TI_name/param_name/variable_0" + state_name = tensor_iterator_name + "//" + body_parameter_name + "//" + "variable_" + idx + + #! [ov:get_ov_model] + core = ov.Core() + ov_model = core.read_model("path_to_the_model") + #! [ov:get_ov_model] + + # reshape input if needed + + #! [ov:reshape_ov_model] + ov_model.reshape({"X": ov.PartialShape([1, 1, 16])}) + #! [ov:reshape_ov_model] + + #! [ov:apply_low_latency_2] + manager = Manager() + manager.register_pass(LowLatency2()) + manager.run_passes(ov_model) + #! [ov:apply_low_latency_2] + + compied_model = core.compile_model(ov_model) + # Try to find the Variable by name + infer_request = compied_model.create_infer_request() + states = infer_request.query_state() + for state in states: + name = state.get_name() + if (name == state_name): + # some actions + #! [ov:low_latency_2] + pass + + #! [ov:low_latency_2_use_parameters] + manager.register_pass(LowLatency2(False)) + #! [ov:low_latency_2_use_parameters] + + +def replace_non_reshapable_const(): + #! [ov:replace_const] + # OpenVINO example. How to replace a Constant with hardcoded values of shapes in the model with another one with the new values. + # Assume we know which Constant (const_with_hardcoded_shape) prevents the reshape from being applied. + # Then we can find this Constant by name in the model and replace it with a new one with the correct shape. + core = ov.Core() + model = core.read_model("path_to_model"); + # Creating the new Constant with a correct shape. + # For the example shown in the picture above, the new values of the Constant should be 1, 1, 10 instead of 1, 49, 10 + new_const = ops.constant( """value_with_correct_shape, type""") + for node in model.get_ops(): + # Trying to find the problematic Constant by name. + if node.get_friendly_name() != "name_of_non_reshapable_const": + continue + # Replacing the problematic Constant with a new one. Do this for all the problematic Constants in the model, then + # you can apply the reshape feature. + replace_node(node, new_const) + + #! [ov:replace_const] + + +def apply_make_stateful_tensor_names(): + #! [ov:make_stateful_tensor_names] + core = ov.Core() + ov_model = core.read_model("path_to_the_model") + tensor_names = {"tensor_name_1": "tensor_name_4", + "tensor_name_3": "tensor_name_6"} + manager = Manager() + manager.register_pass(MakeStateful(tensor_names)) + manager.run_passes(ov_model) + #! [ov:make_stateful_tensor_names] + + +def apply_make_stateful_ov_nodes(): + #! [ov:make_stateful_ov_nodes] + core = ov.Core() + ov_model = core.read_model("path_to_the_model") + # Parameter_1, Result_1, Parameter_3, Result_3 are + # ops.parameter/ops.result in the ov_model + pairs = ["""(Parameter_1, Result_1), (Parameter_3, Result_3)"""] + manager = Manager() + manager.register_pass(MakeStateful(pairs)) + manager.run_passes(ov_model) + #! [ov:make_stateful_ov_nodes] + + +def main(): + + #! [ov:state_api_usage] + # 1. Load inference engine + log.info("Loading OpenVINO") + core = ov.Core() + + # 2. Read a model + log.info("Loading model files") + model = core.read_model("path_to_ir_xml_from_the_previous_section"); + model.get_parameters()[0].set_layout("NC"); + ov.set_batch(model, 1); + + # 3. Load the model to CPU + compiled_model = core.compile_model(model, "CPU") + + # 4. Create Infer Request + infer_request = compiled_model.create_infer_request() + + # 5. Reset memory states before starting + states = infer_request.query_state() + + if len(states) != 1: + log.error(f"Invalid queried state number. Expected 1, but got {str(states.size())}") + return -1 + + infer_request.reset_state() + + # 6. Inference + input_data = np.arange(start=1, stop=12, dtype=np.float32) + + # This example demonstrates how to work with OpenVINO State API. + # Input_data: some array with 12 float numbers + + # Part1: read the first four elements of the input_data array sequentially. + # Expected output for the first utterance: + # sum of the previously processed elements [ 1, 3, 6, 10] + + # Part2: reset state value (set to 0) and read the next four elements. + # Expected output for the second utterance: + # sum of the previously processed elements [ 5, 11, 18, 26] + + # Part3: set state value to 5 and read the next four elements. + # Expected output for the third utterance: + # sum of the previously processed elements + 5 [ 14, 24, 35, 47] + target_state = states[0] + + # Part 1 + log.info("Infer the first utterance") + for next_input in range(len(input_data)/3): + infer_request.infer({0 : input_data[next_input]}) + state_buf = target_state.state.data + log.info(state_buf[0]) + + # Part 2 + log.info("\nReset state between utterances...\n") + target_state.reset() + + log.info("Infer the second utterance") + + for next_input in range(len(input_data)/3, (len(input_data)/3 * 2)): + infer_request.infer({0 : input_data[next_input]}) + state_buf = target_state.state.data + log.info(state_buf[0]) + + # Part 3 + log.info("\nSet state value between utterances to 5...\n") + data = np.asarray([5], dtype=np.float32) + tensor = ov.Tensor(data, shared_memory=True) + target_state.state = tensor + + log.info("Infer the third utterance") + for next_input in range((len(input_data)/3 * 2), len(input_data)): + infer_request.infer({0 : input_data[next_input]}) + + state_buf = target_state.state.data + log.info(state_buf[0]) + + log.info("Execution successful") + #! [ov:state_api_usage] + return 0 diff --git a/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp b/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp index 3f3cd2cb713a19..01170795dbea22 100644 --- a/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp +++ b/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp @@ -14,14 +14,16 @@ using namespace ov; void state_network_example () { - //! [ov:state_network] + //! [ov:stateful_model] // ... auto input = std::make_shared(ov::element::f32, ov::Shape{1, 1}); auto init_const = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1, 1}, {0}); - // The ReadValue/Assign operations must be used in pairs in the network. - // For each such a pair, its own variable object must be created. + // Typically ReadValue/Assign operations are presented as pairs in models. + // ReadValue operation reads information from an internal memory buffer, Assign operation writes data to this buffer. + // For each pair, its own Variable object must be created. + // Variable defines name, shape and type of the buffer. const std::string variable_name("variable0"); ov::op::util::VariableInfo var_info = {init_const->get_shape(), init_const->get_element_type(), @@ -37,7 +39,7 @@ void state_network_example () { auto model = std::make_shared(ov::ResultVector({result}), ov::SinkVector({save}), ov::ParameterVector({input})); - //! [ov:state_network] + //! [ov:stateful_model] } void low_latency_2_example() { diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst index c8e041e5a367e9..5d922ef8bdc4e7 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst @@ -12,6 +12,7 @@ OpenVINO™ Low Precision Transformations :caption: Low Precision Transformations :hidden: + Quantization Scheme Attributes Step 1. Prerequisites transformations Step 2. Markup transformations diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations/quantization-scheme.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations/quantization-scheme.rst new file mode 100644 index 00000000000000..90d757c10668f3 --- /dev/null +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations/quantization-scheme.rst @@ -0,0 +1,27 @@ +Quantization Scheme +============================== + + +.. meta:: + :description: Learn about quantization scheme. + +.. toctree:: + :maxdepth: 1 + :caption: Low Precision Transformations + +Key steps in the quantization scheme: + +* Low Precision Transformations: ``FakeQuantize`` decomposition to Quantize with a low precision output and Dequantize. For more details, refer to the :doc:`Quantize decomposition <../low-precision-transformations>` section. +* Low Precision Transformations: move Dequantize through operations. For more details, refer to the :doc:`Main transformations <./step3-main>` section. +* Plugin: fuse operations with Quantize and inference in low precision. + +Quantization scheme features: + +* Quantization operation is expressed through the ``FakeQuantize`` operation, which involves more than scale and shift. For more details, see: :doc:`FakeQuantize-1 <../../../../openvino-ir-format/operation-sets/operation-specs/quantization/fake-quantize-1>`. If the ``FakeQuantize`` input and output intervals are the same, ``FakeQuantize`` degenerates to ``Multiply``, ``Subtract`` and ``Convert`` (scale & shift). +* Dequantization operation is expressed through element-wise ``Convert``, ``Subtract`` and ``Multiply`` operations. ``Convert`` and ``Subtract`` are optional. These operations can be handled as typical element-wise operations, for example, fused or transformed to another. +* OpenVINO plugins fuse ``Dequantize`` and ``Quantize`` operations after a low precision operation and do not fuse ``Quantize`` before it. + +Here is a quantization scheme example for int8 quantization applied to a part of a model with two ``Convolution`` operations in CPU plugin. + +.. image:: ../../../../../assets/images/quantization_scheme.svg + :alt: Quantization scheme diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 79c3471f3ab783..08efa7406e42b5 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -44,7 +44,7 @@ will not work with these instructions, make sure to import openvino_genai as ov_genai pipe = ov_genai.LLMPipeline(model_path, "CPU") - print(pipe.generate("The Sun is yellow because")) + print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) .. tab-item:: C++ :sync: cpp @@ -57,7 +57,7 @@ will not work with these instructions, make sure to int main(int argc, char* argv[]) { std::string model_path = argv[1]; ov::genai::LLMPipeline pipe(model_path, "CPU"); - std::cout << pipe.generate("The Sun is yellow because"); + std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); } The `LLMPipeline` is the main object used for decoding. You can construct it directly from the @@ -85,7 +85,7 @@ below, where a lambda function outputs words to the console immediately upon gen pipe = ov_genai.LLMPipeline(model_path, "CPU") streamer = lambda x: print(x, end='', flush=True) - pipe.generate("The Sun is yellow because", streamer=streamer) + pipe.generate("The Sun is yellow because", streamer=streamer, max_new_tokens=100) .. tab-item:: C++ @@ -104,7 +104,7 @@ below, where a lambda function outputs words to the console immediately upon gen // false means continue generation. return false; }; - pipe.generate("The Sun is yellow because", ov::genai::streamer(streamer)); + pipe.generate("The Sun is yellow because", ov::genai::streamer(streamer), ov::genai::max_new_tokens(100)); } You can also create your custom streamer for more sophisticated processing: @@ -132,7 +132,7 @@ You can also create your custom streamer for more sophisticated processing: # Decode tokens and process them. pipe = ov_genai.LLMPipeline(model_path, "CPU") - pipe.generate("The Sun is yellow because", streamer=CustomStreamer()) + pipe.generate("The Sun is yellow because", streamer=CustomStreamer(), max_new_tokens=100) .. tab-item:: C++ @@ -164,7 +164,7 @@ You can also create your custom streamer for more sophisticated processing: std::string model_path = argv[1]; ov::genai::LLMPipeline pipe(model_path, "CPU"); - pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer)); + pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer), ov::genai::max_new_tokens(100)); } Using GenAI in Chat Scenario diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst index a393a0925cba3c..da4f34b8806aea 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst @@ -52,11 +52,12 @@ Compress Model Weights **8-bit weight quantization** method offers a balance between model size reduction and maintaining accuracy, which usually leads to significant performance improvements for Transformer-based models. Models with 8-bit compressed weights are performant on the -vast majority of supported CPU and GPU platforms. +vast majority of supported CPU and GPU platforms. By default, weights are compressed +asymmetrically to "INT8_ASYM" mode. -The code snippet below shows how to do 8-bit quantization of the model weights represented -in OpenVINO IR using NNCF: +The code snippet below shows how to do asymmetrical 8-bit quantization of the model weights +represented in OpenVINO IR using NNCF: .. tab-set:: @@ -72,7 +73,7 @@ Now, the model is ready for compilation and inference. It can be also saved into a compressed format, resulting in a smaller binary file. **4-bit weight quantization** method stands for an INT4-INT8 mixed-precision weight quantization, -where INT4 is considered as the primary precision and INT8 is the backup one. +where INT4 is considered as the primary precision and asymmetric INT8 is the backup one. It usually results in a smaller model size and lower inference latency, although the accuracy degradation could be higher, depending on the model. @@ -100,7 +101,7 @@ memory reduction, speed gain, and accuracy loss. - Memory Reduction - Latency Improvement - Accuracy Loss - * - INT8 + * - INT8 Asymmetric - Low - Medium - Low @@ -122,8 +123,8 @@ trade-offs after optimization: **Symmetric Compression** - ``INT4_SYM`` - INT4 Symmetric mode involves quantizing weights to an unsigned 4-bit integer - symmetrically with a fixed zero point of 8. This mode is faster than the INT8, making + INT4 Symmetric mode involves quantizing weights to a signed 4-bit integer + symmetrically without zero point. This mode is faster than the INT8_ASYM, making it ideal for situations where **speed and size reduction are prioritized over accuracy**. .. code-block:: python @@ -159,15 +160,15 @@ trade-offs after optimization: `Larger Group Size`: Results in faster inference and a smaller model, but might compromise accuracy. -* ``ratio`` controls the ratio between INT4 and INT8 compressed layers in the model. +* ``ratio`` controls the ratio between INT4 and INT8_ASYM compressed layers in the model. Ratio is a decimal between 0 and 1. For example, 0.8 means that 80% of layers will be - compressed to INT4, while the rest will be compressed to INT8 precision. The default + compressed to INT4, while the rest will be compressed to INT8_ASYM precision. The default value for ratio is 1. `Higher Ratio (more INT4)`: Reduces the model size and increase inference speed but might lead to higher accuracy degradation. - `Lower Ratio (more INT8)`: Maintains better accuracy but results in a larger model size + `Lower Ratio (more INT8_ASYM)`: Maintains better accuracy but results in a larger model size and potentially slower inference. In this example, 90% of the model's layers are quantized to INT4 asymmetrically with @@ -238,7 +239,7 @@ If the model comes from `Hugging Face `__ and is by Optimum, it may be easier to use the Optimum Intel API to perform weight compression. The compression type is specified when the model is loaded using the ``load_in_8bit=True`` or ``load_in_4bit=True`` parameter. The second example uses the Weight Compression API -from Optimum Intel instead of NNCF to compress the model to INT8. +from Optimum Intel instead of NNCF to compress the model to INT8_ASYM. .. tab-set:: @@ -359,7 +360,7 @@ score indicates a lower accuracy. It is measured on the - 5.01 - 10.3 * - databricks/dolly-v2-3b - - INT8 + - INT8_ASYM - 5.07 - 2.6 * - databricks/dolly-v2-3b @@ -371,7 +372,7 @@ score indicates a lower accuracy. It is measured on the - 4.25 - 24.8 * - facebook/opt-6.7b - - INT8 + - INT8_ASYM - 4.27 - 6.2 * - facebook/opt-6.7b @@ -383,7 +384,7 @@ score indicates a lower accuracy. It is measured on the - 3.28 - 25.1 * - meta-llama/Llama-2-7b-chat-hf - - INT8 + - INT8_ASYM - 3.29 - 6.3 * - meta-llama/Llama-2-7b-chat-hf @@ -395,7 +396,7 @@ score indicates a lower accuracy. It is measured on the - 4.15 - 25.6 * - togethercomputer/RedPajama-INCITE-7B-Instruct - - INT8 + - INT8_ASYM - 4.17 - 6.4 * - togethercomputer/RedPajama-INCITE-7B-Instruct @@ -407,7 +408,7 @@ score indicates a lower accuracy. It is measured on the - 2.92 - 48.5 * - meta-llama/Llama-2-13b-chat-hf - - INT8 + - INT8_ASYM - 2.91 - 12.1 * - meta-llama/Llama-2-13b-chat-hf diff --git a/docs/articles_en/openvino-workflow/model-preparation.rst b/docs/articles_en/openvino-workflow/model-preparation.rst index c6c7eaeb17fb31..bea0fcdba5311b 100644 --- a/docs/articles_en/openvino-workflow/model-preparation.rst +++ b/docs/articles_en/openvino-workflow/model-preparation.rst @@ -267,6 +267,7 @@ Before saving the model to OpenVINO IR, consider :doc:`Post-training Optimization ` to achieve more efficient inference and a smaller model. +.. _convert_model_cli_ovc: Convert a Model in CLI: ``ovc`` ############################### diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index 4c262b49f6f704..7ac982e37f6716 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -6,6 +6,13 @@ NPU Device a low-power processing device dedicated to running AI inference. +.. toctree:: + :maxdepth: 1 + :hidden: + + npu-device/remote-tensor-api-npu-plugin + + The Neural Processing Unit is a low-power hardware solution, introduced with the Intel® Core™ Ultra generation of CPUs (formerly known as Meteor Lake). It enables you to offload certain neural network computation tasks from other devices, @@ -164,8 +171,8 @@ offer a limited set of supported OpenVINO features. **ov::intel_npu::compilation_mode_params** -``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows to -control model compilation for NPU. +``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows +control of model compilation for NPU. .. note:: @@ -176,7 +183,7 @@ Following configuration options are supported: **optimization-level** -Defines a preset of optimization passes to be applied during compilation. +Defines an optimization effort hint to the compiler. .. list-table:: :widths: 10 200 @@ -185,7 +192,7 @@ Defines a preset of optimization passes to be applied during compilation. * - **Value** - **Description** * - 0 - - Reduced subset of optimization passes. Smaller compile time. + - Reduced subset of optimization passes. May result in smaller compile time. * - 1 - **Default.** Balanced performance/compile time. * - 2 @@ -193,7 +200,7 @@ Defines a preset of optimization passes to be applied during compilation. **performance-hint-override** -An extension for LATENCY mode being specified using ``ov::hint::performance_mode`` +The LATENCY mode can be overridden by specifying ``ov::hint::performance_mode`` Has no effect for other ``ov::hint::PerformanceMode`` hints. .. list-table:: @@ -207,15 +214,31 @@ Has no effect for other ``ov::hint::PerformanceMode`` hints. * - latency - Prioritize performance over power efficiency. -.. tab-set:: +Usage example: - .. tab-item:: Usage example +.. code-block:: - .. code-block:: + map config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")}; + + compile_model(model, config); + +**npu_turbo** + +The turbo mode, where available, provides a hint to the system to maintain the +maximum NPU frequency and memory throughput within the platform TDP limits. +The turbo mode is not recommended for sustainable workloads due to higher power +consumption and potential impact on other compute resources. + +.. code-block:: + + core.set_property("NPU", ov::intel_npu::turbo(true)); + +or + +.. code-block:: - map config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")}; + core.compile_model(ov_model, "NPU", {ov::intel_npu::turbo(true)}); - compile_model(model, config); Limitations ############################# diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device/remote-tensor-api-npu-plugin.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device/remote-tensor-api-npu-plugin.rst new file mode 100644 index 00000000000000..2e41f4f5616ff2 --- /dev/null +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device/remote-tensor-api-npu-plugin.rst @@ -0,0 +1,137 @@ +.. {#openvino_docs_OV_UG_supported_plugins_NPU_RemoteTensor_API} + +Remote Tensor API of NPU Plugin +=============================== + + +.. meta:: + :description: The Remote Tensor API of NPU plugin in OpenVINO™ supports + interoperability with existing native APIs, such as + NT handle, or DMA-BUF System Heap. + + +The NPU plugin implementation of the ``ov::RemoteContext`` and ``ov::RemoteTensor`` interface assists NPU +pipeline developers who need memory sharing with existing native APIs (for example, OpenCL, Vulkan, DirectX 12) +by exporting an NT handle on Windows, or DMA-BUF System Heap on Linux and passing that pointer as the +``shared_buffer`` member to the ``remote_tensor(..., shared_buffer)`` create function. They allow you +to avoid any memory copy overhead when plugging OpenVINO™ inference into an existing NPU pipeline. + +Supported scenario by the Remote Tensor API: + +* The NPU plugin context and memory objects can be constructed from low-level device, display, or memory handles and used to create the OpenVINO™ ``ov::CompiledModel`` or ``ov::Tensor`` objects. + +Class and function declarations for the API are defined in the following file: ``src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp`` + +The most common way to enable the interaction of your application with the Remote Tensor API is to use user-side utility classes +and functions that consume or produce native handles directly. + +Context Sharing Between Application and NPU Plugin +################################################## + +NPU plugin classes that implement the ``ov::RemoteContext`` interface are responsible for context sharing. +Obtaining a context object is the first step in sharing pipeline objects. +The context object of the NPU plugin directly wraps Level Zero context, setting a scope for sharing the +``ov::RemoteTensor`` objects. The ``ov::RemoteContext`` object is retrieved from the NPU plugin. + +Once you have obtained the context, you can use it to create the ``ov::RemoteTensor`` objects. + +Getting RemoteContext from the Plugin ++++++++++++++++++++++++++++++++++++++ + +To request the current default context of the plugin, use one of the following methods: + +.. tab-set:: + + .. tab-item:: Get context from Core + :sync: get-context-core + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_core] + + .. tab-item:: Get context from compiled model + :sync: get-context-compiled-model + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_model] + +Memory Sharing Between Application and NPU Plugin +################################################# + +The classes that implement the ``ov::RemoteTensor`` interface are the wrappers for native API +memory handles, which can be obtained from them at any time. + +To create a shared tensor from a native memory handle, use dedicated ``create_tensor``, ``create_l0_host_tensor``, or ``create_host_tensor`` +methods of the ``ov::RemoteContext`` sub-classes. +``ov::intel_npu::level_zero::LevelZero`` has multiple overloads methods which enable wrapping pre-allocated native handles with the ``ov::RemoteTensor`` +object or requesting plugin to allocate specific device memory. +For more details, see the code snippets below: + + +.. tab-set:: + + .. tab-item:: Wrap native handle + :sync: wrap-native-handles + + .. tab-set:: + + .. tab-item:: NT handle + :sync: nthandle + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [wrap_nt_handle] + + .. tab-item:: DMA-BUF System Heap file descriptor + :sync: dma-buf + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [wrap_dmabuf_fd] + + .. tab-item:: Allocate device memory + :sync: allocate-device-memory + + .. tab-set:: + + .. tab-item:: Remote Tensor - Level Zero host memory + :sync: remote-level-zero-host-memory + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [allocate_remote_level_zero_host] + + .. tab-item:: Tensor - Level Zero host memory + :sync: level-zero-host-memory + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [allocate_level_zero_host] + + +Limitations ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +* Allocation of the NT handle or DMA-BUF System Heap file descriptor is done manually. + +Low-Level Methods for RemoteContext and RemoteTensor Creation +############################################################# + +The high-level wrappers mentioned above bring a direct dependency on native APIs to your program. +If you want to avoid the dependency, you still can directly use the ``ov::Core::create_context()``, +``ov::RemoteContext::create_tensor()``, and ``ov::RemoteContext::get_params()`` methods. +On this level, native handles are re-interpreted as void pointers and all arguments are passed +using ``ov::AnyMap`` containers that are filled with the ``std::string, ov::Any`` pairs. +Two types of map entries are possible: a descriptor and a container. +The descriptor sets the expected structure and possible parameter values of the map. + +For possible low-level properties and their description, refer to the header file: +`remote_properties.hpp `__. + +Additional Resources +#################### + +* `ov::Core `__ +* `ov::RemoteTensor `__ + diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst index 7d19e17a70f2c6..3fa01212b6d86b 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst @@ -10,7 +10,6 @@ Optimize Preprocessing optimize-preprocessing/preprocessing-api-details optimize-preprocessing/layout-api-overview - optimize-preprocessing/integrate-save-preprocessing-use-case Torchvision preprocessing converter .. meta:: diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst index cb03e3b4e8129f..ef8613b84f0626 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst @@ -3,6 +3,11 @@ Preprocessing API - details =========================== +.. toctree:: + :maxdepth: 1 + :hidden: + + preprocessing-api-details/integrate-save-preprocessing-use-case .. meta:: :description: Learn the details on capabilities of pre-processing API and post-processing. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/integrate-save-preprocessing-use-case.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details/integrate-save-preprocessing-use-case.rst similarity index 60% rename from docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/integrate-save-preprocessing-use-case.rst rename to docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details/integrate-save-preprocessing-use-case.rst index aeb59c2e37a08e..2563b9270082b0 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/integrate-save-preprocessing-use-case.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details/integrate-save-preprocessing-use-case.rst @@ -10,8 +10,8 @@ Use Case - Integrate and Save Preprocessing Steps Into IR OpenVINO Intermediate Representation. -Previous sections covered the topic of the :doc:`preprocessing steps ` -and the overview of :doc:`Layout ` API. +Previous sections covered the :doc:`preprocessing steps <../preprocessing-api-details>` +and the overview of :doc:`Layout API <../layout-api-overview>`. For many applications, it is also important to minimize read/load time of a model. Therefore, performing integration of preprocessing steps every time on application @@ -20,25 +20,18 @@ once pre and postprocessing steps have been added, it can be useful to store new model to OpenVINO Intermediate Representation (OpenVINO IR, `.xml` format). Most available preprocessing steps can also be performed via command-line options, -using Model Optimizer. For details on such command-line options, refer to the -:doc:`Optimizing Preprocessing Computation <../../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>`. +using ``ovc``. For details on such command-line options, refer to the +:ref:`Model Conversion `. Code example - Saving Model with Preprocessing to OpenVINO IR ############################################################# -When some preprocessing steps cannot be integrated into the execution graph using -Model Optimizer command-line options (for example, ``YUV``->``RGB`` color space conversion, -``Resize``, etc.), it is possible to write a simple code which: +In the following example: -* Reads the original model (OpenVINO IR, TensorFlow, TensorFlow Lite, ONNX, PaddlePaddle). -* Adds the preprocessing/postprocessing steps. -* Saves resulting model as IR (``.xml`` and ``.bin``). +* Original ONNX model takes one ``float32`` input with the ``{1, 3, 224, 224}`` shape, the ``RGB`` channel order, and mean/scale values applied. +* Application provides ``BGR`` image buffer with a non-fixed size and input images as batches of two. -Consider the example, where an original ONNX model takes one ``float32`` input with the -``{1, 3, 224, 224}`` shape, the ``RGB`` channel order, and mean/scale values applied. -In contrast, the application provides ``BGR`` image buffer with a non-fixed size and -input images as batches of two. Below is the model conversion code that can be applied -in the model preparation script for such a case. +Below is the model conversion code that can be applied in the model preparation script for this case: * Includes / Imports @@ -62,7 +55,6 @@ in the model preparation script for such a case. * Preprocessing & Saving to the OpenVINO IR code. - .. tab-set:: .. tab-item:: Python @@ -83,8 +75,8 @@ in the model preparation script for such a case. Application Code - Load Model to Target Device ############################################## -After this, the application code can load a saved file and stop preprocessing. In this case, enable -:doc:`model caching <../optimizing-latency/model-caching-overview>` to minimize load +Next, the application code can load a saved file and stop preprocessing. In this case, enable +:doc:`model caching <../../optimizing-latency/model-caching-overview>` to minimize load time when the cached model is available. @@ -108,10 +100,10 @@ time when the cached model is available. Additional Resources #################### -* :doc:`Preprocessing Details ` -* :doc:`Layout API overview ` -* :doc:`Model Optimizer - Optimize Preprocessing Computation <../../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` -* :doc:`Model Caching Overview <../optimizing-latency/model-caching-overview>` +* :doc:`Preprocessing Details <../preprocessing-api-details>` +* :doc:`Layout API overview <../layout-api-overview>` +* :doc:`Model Caching Overview <../../optimizing-latency/model-caching-overview>` +* :doc:`Model Preparation <../../../../model-preparation>` * The `ov::preprocess::PrePostProcessor `__ C++ class documentation * The `ov::pass::Serialize `__ - pass to serialize model to XML/BIN * The ``ov::set_batch`` - update batch dimension for a given model diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst index 38af00d3796d5d..09701ab97d23fd 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst @@ -61,7 +61,8 @@ To enable model caching, the application must specify a folder to store the cach With this code, if the device specified by ``device_name`` supports import/export model capability, -a cached blob is automatically created inside the ``/path/to/cache/dir`` folder. +a cached blob (the ``.cl_cache`` and ``.blob`` file for GPU and CPU respectively) is automatically +created inside the ``/path/to/cache/dir`` folder. If the device does not support the import/export capability, cache is not created and no error is thrown. Note that the first ``compile_model`` operation takes slightly longer, as the cache needs to be created - diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst index a350d1bcbb5a77..a7db3317203045 100644 --- a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst +++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst @@ -60,12 +60,20 @@ Parameter/Result tensor names. If there are no tensor names, .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:make_stateful_tensor_names] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:make_stateful_tensor_names] .. tab-item:: Using Parameter/Result operations .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:make_stateful_ov_nodes] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:make_stateful_ov_nodes] .. tab-item:: command line @@ -114,6 +122,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:get_ov_model] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:get_ov_model] 2. Change the number of iterations inside TensorIterator/Loop nodes in the model using the :doc:`Reshape <../changing-input-shape>` feature. @@ -129,6 +141,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:reshape_ov_model] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:reshape_ov_model] **Unrolling**: If the LowLatency2 transformation is applied to a model containing TensorIterator/Loop nodes with exactly one iteration inside, these nodes are unrolled. @@ -143,6 +159,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:apply_low_latency_2] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:apply_low_latency_2] (Optional) Use Const Initializer argument: @@ -159,6 +179,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:low_latency_2_use_parameters] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:low_latency_2_use_parameters] .. image:: ../../../assets/images/llt2_use_const_initializer.svg @@ -178,6 +202,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:low_latency_2] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:low_latency_2] 4. Use state API. See sections :doc:`OpenVINO State API <../stateful-models>`, @@ -208,6 +236,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:replace_const] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:replace_const] Stateful Model from Scratch ################################## @@ -228,7 +260,11 @@ a sink from `ov::Model` after deleting the node from the graph with the `delete_ .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp - :fragment: [ov:state_network] + :fragment: [ov:stateful_model] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:stateful_model] .. note:: diff --git a/samples/js/node/README.md b/samples/js/node/README.md index 59fb381f460abc..7375219ccf2c0a 100644 --- a/samples/js/node/README.md +++ b/samples/js/node/README.md @@ -26,6 +26,7 @@ VSCode extension to run these notebook samples - hello-detection.nnb - question-answering.nnb - pose-estimation.nnb + - optical-character-recognition.nnb ## Live Sample diff --git a/samples/js/node/notebooks/optical-character-recognition.nnb b/samples/js/node/notebooks/optical-character-recognition.nnb new file mode 100644 index 00000000000000..b7e8e109ff857f --- /dev/null +++ b/samples/js/node/notebooks/optical-character-recognition.nnb @@ -0,0 +1,314 @@ +{ + "cells": [ + { + "language": "markdown", + "source": [ + "# Optical Character Recognition with OpenVINO™" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "#### This tutorial demonstrates how to perform optical character recognition (OCR) with OpenVINO models" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Imports" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "const fs = require(\"node:fs\");\nconst path = require(\"node:path\");\nconst { createCanvas, Image, ImageData } = require(\"canvas\");\nconst { addon: ov } = require(\"openvino-node\");\nconst { display } = require(\"node-kernel\");\nconst { cv } = require(\"opencv-wasm\");\nconst {\n transform,\n getImageData,\n displayArrayAsImage,\n downloadFile,\n arrayToImageData,\n getImageBuffer,\n argMax,\n setShape,\n} = require(\"../helpers.js\");\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Download Models" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Intializing Images, Models\nconst baseArtifactsDir = '../../assets/models';\nconst detBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/';\nconst recBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/text-recognition-resnet-fc/';\nconst detectionModelName = 'horizontal-text-detection-0001';\nconst textRecModelName = 'text-recognition-resnet-fc';\n\nconst detModelXMLName = `${detectionModelName}.xml`;\nconst detModelBINName = `${detectionModelName}.bin`;\n\nconst detModelXMLPath = `${baseArtifactsDir}/${detModelXMLName}`;\nconst detModelBINPath = `${baseArtifactsDir}/${detModelBINName}`;\n\nconst recModelXMLName = `${textRecModelName}.xml`;\nconst recModelBINName = `${textRecModelName}.bin`;\n\nconst recModelXMLPath = `${baseArtifactsDir}/${textRecModelName}.xml`;\nconst recModelBINPath = `${baseArtifactsDir}/${textRecModelName}.bin`;\n\nawait downloadFile(\n detBaseURL + detModelXMLName,\n detModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n detBaseURL + detModelBINName,\n detModelBINName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelXMLName,\n recModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelBINName,\n recModelBINName,\n baseArtifactsDir\n);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.xml'", + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.bin'", + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.xml'", + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.bin'", + "" + ] + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "# Dowload Image" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\nconst imgName = 'intel_rnb.jpg';\nawait downloadFile(imgUrl, imgName, baseImagesDir);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/images/intel_rnb.jpg'", + "" + ] + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "# Load a Detection Model" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst detModel = await core.readModel(detModelXMLPath);\nconst detCompiledModel = await core.compileModel(detModel, 'AUTO');\nconst detInputLayer = detCompiledModel.input(0);\nconst detOutputLayer = detCompiledModel.output('boxes');\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Prepare Image for Inference" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "const imageData = await getImageData(`${baseImagesDir}/intel_rnb.jpg`);\nconst inputImageMat = cv.matFromImageData(imageData);\nconst displayImageMat = inputImageMat.clone();\n\n// Resize the image to meet network input size\nconst [B, C, H, W] = detInputLayer.shape;\nconst resizedImage = new cv.Mat();\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB);\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB);\ncv.resize(inputImageMat, resizedImage, new cv.Size(W, H));\n\n// Prepare input tensor\nconst inputImage = transform(resizedImage.data,\n { width: W, height: H },\n [0, 1, 2]);\nconst tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(\n ov.element.f32,\n detInputLayer.shape,\n tensorData\n);\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "## Define Post-Processing Functions" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n console.log(`Output shape: ${output.getData()}`);\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Do Inference" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Create infer request\nconst detInferRequest = detCompiledModel.createInferRequest();\n\nconst detResult = await detInferRequest.inferAsync([tensor]);\nconst boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]);\n\n// Show original image\ndisplayArrayAsImage(\n displayImageMat.data,\n displayImageMat.cols,\n displayImageMat.rows,\n display\n);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "# Load Text Recognition Model" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Loading the text recognition model\nconst recModel = await core.readModel(recModelXMLPath);\nconst recModelCompiled = await core.compileModel(recModel, 'AUTO');\nconst recInputLayer = recModelCompiled.input(0);\nconst recOutputLayer = recModelCompiled.output(0);\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Define Post-Processing Functions" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Function to calculate the ratios for the image\nfunction calculateRatios(originalImage, resizedImage) {\n const realY = originalImage.rows;\n const realX = originalImage.cols;\n const resizedY = resizedImage.rows;\n const resizedX = resizedImage.cols;\n const ratioX = realX / resizedX;\n const ratioY = realY / resizedY;\n\n return { ratioX, ratioY };\n}\n\n// Function to convert the image to grayscale\nfunction convertToGrayscale(originalImage) {\n const grayscaleImage = new cv.Mat();\n cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY);\n\n return grayscaleImage;\n}\n\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return box.map(scaleShape);\n}\n\n\n// Function to resize and convert a crop to the recognition model input format\nfunction resizeAndConvertCropToModelInput(crop, netShape) {\n const [netWidth, netHeight] = netShape;\n\n // Resize the crop to the network's input shape\n const tempImg = new cv.Mat();\n cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight));\n\n // Create the reshaped buffer\n const reshapedBuffer = new Uint8Array(netHeight * netWidth);\n let index = 0;\n\n for (let i = 0; i < netHeight; i++) {\n for (let j = 0; j < netWidth; j++) {\n reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0];\n }\n }\n\n // Clean up\n tempImg.delete();\n\n return reshapedBuffer;\n}\n\n// Function to extract recognition results from the model output\nfunction extractRecognitionResults(output) {\n const outputData = output.getData();\n const outputShape = output.getShape();\n const [batchSize, height, width] = outputShape;\n\n return setShape(outputData, [height, width]);\n}\n\n// Function to parse annotations from the recognition results\nfunction parseAnnotations(recognitionResults) {\n const letters = \"~0123456789abcdefghijklmnopqrstuvwxyz\";\n const annotation = [];\n\n for (const row of recognitionResults) {\n const letterIndex = argMax(row);\n const parsedLetter = letters[letterIndex];\n\n // Stop if end character is encountered\n if (parsedLetter === letters[0]) break;\n annotation.push(parsedLetter);\n }\n\n return annotation.join('');\n}\n\n// Function to crop the image based on the bounding box coordinates\nfunction cropImage(originalImage, xMin, yMin, xMax, yMax) {\n xMin = Math.max(0, xMin);\n yMin = Math.max(0, yMin);\n xMax = Math.min(originalImage.cols, xMax);\n yMax = Math.min(originalImage.rows, yMax);\n if (xMin >= xMax || yMin >= yMax) {\n throw new Error('Invalid crop coordinates');\n }\n const roi = originalImage.roi(\n new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin)\n );\n const cropped = new cv.Mat();\n roi.copyTo(cropped);\n roi.delete();\n\n return cropped;\n}\n\n// Function to log the bounding boxes with annotations\nfunction printSortedAnnotations(boxesWithAnnotations) {\n /* Sort the boxes with annotations based\n on their position in the input image */\n const sortedAnnotations = boxesWithAnnotations\n .sort((a, b) => {\n const [aXMin, aYMin] = a.box;\n const [bXMin, bYMin] = b.box;\n\n return (aYMin - bYMin) || (aXMin - bXMin);\n })\n .map(item => item.annotation);\n\n console.log('Sorted Annotations:', sortedAnnotations);\n}\n\n// Get Text size\nfunction getTextSize(text, fontFace, fontScale) {\n const canvas = createCanvas(200, 200);\n const ctx = canvas.getContext('2d');\n const adjustedFontScale = fontScale * 35;\n ctx.font = `${adjustedFontScale}px ${fontFace}`;\n const metrics = ctx.measureText(text);\n const width = metrics.width;\n const height =\n metrics.actualBoundingBoxAscent +\n metrics.actualBoundingBoxDescent;\n\n return { width, height };\n}\n\n/* The convertResultToImage function visualizes object detection\n results on an image by drawing bounding boxes around detected\n objects and optionally adding labels to them. */\nfunction convertResultToImage(\n bgrImage,\n resizedImage,\n boxesWithAnnotations,\n options,\n) {\n const defaultOptions = { threshold: 0.3, confLabels: true };\n const { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n const colors = {\n red: [255, 0, 0, 255],\n green: [0, 255, 0, 255],\n white: [255, 255, 255, 255]\n };\n const [realY, realX] = [bgrImage.rows, bgrImage.cols];\n const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols];\n const [ratioX, ratioY] = [realX / resizedX, realY / resizedY];\n\n const rgbImage = new cv.Mat();\n cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB);\n\n boxesWithAnnotations.forEach(({ box, annotation }) => {\n const conf = box[box.length - 1];\n\n if (conf < threshold) return;\n\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box);\n\n cv.rectangle(\n rgbImage,\n new cv.Point(xMin, yMin),\n new cv.Point(xMax, yMax),\n colors.green,\n 3\n );\n\n if (!confLabels) return;\n\n const text = `${annotation}`;\n const fontScale = 0.8;\n const thickness = 1;\n const { width: textW, height: textH } = getTextSize(text, 'Arial', fontScale);\n const imageCopy = rgbImage.clone();\n\n cv.rectangle(\n imageCopy,\n new cv.Point(xMin, yMin - textH - 10),\n new cv.Point(xMin + textW, yMin - 10),\n colors.white,\n cv.FILLED\n );\n cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage);\n cv.putText(\n rgbImage,\n text,\n new cv.Point(xMin, yMin - 10),\n cv.FONT_HERSHEY_SIMPLEX,\n fontScale,\n colors.red,\n thickness,\n cv.LINE_AA\n );\n\n imageCopy.delete();\n\n });\n\n return rgbImage;\n}\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Async Inference Helper Function" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "async function inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations,\n) {\n // Create infer request\n const inferRequest = recModelCompiled.createInferRequest();\n\n // Define the completion callback function\n function completionCallback(outputTensor, i, annotations) {\n const recognitionResults = extractRecognitionResults(outputTensor);\n const annotation = parseAnnotations(recognitionResults);\n annotations.push(annotation);\n }\n\n // Start inference in asynchronous mode\n try {\n const result = await inferRequest.inferAsync([tensor]);\n completionCallback(result[recOutputLayer], i, annotations);\n }catch (error) {\n console.error('Error during inference:', error);\n }\n}\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "### Do Inference and Show Detected Text Boxes and OCR Results for the Image\n" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Process each bounding box and run inference on the recognition model\nconst [batchSize, channels, height, width] = recInputLayer.shape;\n// Calculate ratios\nconst {\n ratioX,\n ratioY,\n} = calculateRatios(inputImageMat, resizedImage);\n\n// Convert image to grayscale\nconst grayscaleImage = convertToGrayscale(inputImageMat);\n\nconst annotations = [];\nconst croppedImages = [];\n\n\nfor (let i = 0; i < boundingBoxesArray.length; i++) {\n const crop = boundingBoxesArray[i];\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map(Math.floor);\n const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin);\n const croppedImage = grayscaleImage.roi(cropRect);\n\n try {\n const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [width, height]);\n const tensorData = new Float32Array(preprocessedCrop);\n const tensor = new ov.Tensor(\n ov.element.f32,\n Int32Array.from(recInputLayer.shape),\n tensorData\n );\n\n await inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations\n );\n\n croppedImages.push(\n cropImage(inputImageMat, xMin, yMin, xMax, yMax)\n );\n } catch (error) {\n console.error('Error during preprocessing:', error);\n }\n\n croppedImage.delete();\n}\n\ngrayscaleImage.delete();\n\nconst boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({\n box,\n annotation: annotations[index]\n}));\n\nconst resultImage = convertResultToImage(\n inputImageMat,\n resizedImage,\n boxesWithAnnotations,\n { threshold: 0.3, confLabels: true }\n);\n\ndisplayArrayAsImage(\n resultImage.data,\n resultImage.cols,\n resultImage.rows,\n display\n);\n\ncroppedImages.forEach((croppedImage) => {\n displayArrayAsImage(\n croppedImage.data,\n croppedImage.cols,\n croppedImage.rows,\n display\n );\n});\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Annotation for box 0: building", + "Cropped Image Size: 159 x 40", + "Annotation for box 1: noyce", + "Original Image Size: 690 x 517", + "Cropping Coordinates: (256, 50) to (377, 88)", + "Cropped Image Size: 121 x 38", + "Cropping Coordinates: (604, 205) to (653, 228)", + "Cropped Image Size: 49 x 23", + "Cropped Image Size: 26 x 32", + "Cropped Image Size: 31 x 23", + "Text: noyce, Width: 74.716796875, Height: 21", + "Text: 2200, Width: 62.2890625, Height: 19", + "Text: robert, Width: 73.14453125, Height: 20", + "" + ] + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "### Print Annotations in Plain Text Format" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "printSortedAnnotations(boxesWithAnnotations);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Sorted Annotations: [ 'robert', 'n', 'noyce', 'building', '2200', 'center' ]", + "" + ] + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 3e8ceebcaa0e49..40aab57b2c7e6b 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -126,12 +126,7 @@ TEST_P(ov_core_test, ov_core_compile_model) { ov_core_free(core); } -#ifdef OPENVINO_ARCH_ARM64 -// Ticket: 126283 -TEST_P(ov_core_test, DISABLED_ov_core_compile_model_with_property) { -#else TEST_P(ov_core_test, ov_core_compile_model_with_property) { -#endif auto device_name = GetParam(); ov_core_t* core = nullptr; OV_EXPECT_OK(ov_core_create(&core)); @@ -149,12 +144,7 @@ TEST_P(ov_core_test, ov_core_compile_model_with_property) { char* property_value = nullptr; OV_EXPECT_OK(ov_compiled_model_get_property(compiled_model, key, &property_value)); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) - // TODO: fix once ARM plugin supports multi-stream - EXPECT_STREQ(property_value, "1"); -#else EXPECT_STREQ(property_value, "2"); -#endif ov_free(property_value); ov_compiled_model_free(compiled_model); diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 6127d46c62a103..49ebd8d4f87716 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -2,7 +2,7 @@ numpy>=1.16.6,<2.1.0 # Python bindings, frontends # pytest -pytest>=5.0,<8.3 +pytest>=5.0,<8.4 pytest-dependency==0.6.0 pytest-html==4.1.1 pytest-timeout==2.2.0 @@ -10,7 +10,7 @@ pytest-timeout==2.2.0 # Python bindings py>=1.9.0 pygments>=2.8.1 -setuptools>=65.6.1 +setuptools>=65.6.1,<72 sympy>=1.10 wheel>=0.38.1 patchelf<=0.17.2.1 @@ -19,7 +19,7 @@ patchelf<=0.17.2.1 h5py>=3.1.0,<3.12.0 docopt~=0.6.2 paddlepaddle==2.6.0 -tensorflow>=1.15.5,<2.17.0 +tensorflow>=1.15.5,<2.18.0 six~=1.16.0 protobuf>=3.18.1,<4.0.0 onnx==1.15.0 diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index a1293f89a1ffc5..d9dae251aa64e7 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -16,6 +16,11 @@ logger.setLevel(logging.WARNING) +class InlinedInput: + def __init__(self, data) -> None: + self.data = data + + class TorchFXPythonDecoder (Decoder): def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]): @@ -59,7 +64,7 @@ def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, i for arg in uargs if arg[1] is not None] for idx, shape in enumerate(found_shapes): if shape is not None: - new_shape=[] + new_shape = [] for dim in range(0, len(shape)): if (type(shape[dim]).__name__ == "SymInt"): new_shape.append(-1) @@ -81,7 +86,7 @@ def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, i # None in inputs mean the input is inlined or None (also considered inlined) self._inputs = [self._nodes.index( - arg) if arg in self._nodes else (arg,) for arg in pt_module.args] + arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args] # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments. new_inputs = [] @@ -92,22 +97,22 @@ def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, i if arg in self._nodes: new_inputs.append(self._nodes.index(arg)) else: - new_inputs.append((arg,)) + new_inputs.append(InlinedInput(arg)) self.input_types.append(OVAny(DecoderType.List( TorchFXPythonDecoder.get_type_for_value(arg)))) else: v = self._inputs[i] new_inputs.append(v) self.input_types.append( - TorchFXPythonDecoder.get_type_for_value(v[0] if isinstance(v, tuple) else self._nodes[v])) + TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v])) self._inputs = new_inputs def inputs(self): # Consider 0 a special case which may mean the input is inlined, but not guaranteed - return [x if not isinstance(x, tuple) else 0 for x in self._inputs] + return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs] def is_input_inlined(self, index): - return isinstance(self._inputs[index], tuple) + return isinstance(self._inputs[index], InlinedInput) @staticmethod def unpack_containers(arg): @@ -142,19 +147,24 @@ def arg_to_constant(arg): return make_constant(OVType.i64, Shape([]), [arg]) elif isinstance(arg, float): return make_constant(OVType.f32, Shape([]), [arg]) + elif isinstance(arg, str): + u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8) + return torch_tensor_to_ov_const(u8_tensor, shared_memory=True) return None def inlined_input(self, index): assert index < len(self._inputs), "Requested input doesn't exist" assert isinstance( - self._inputs[index], tuple), "Requested input which is not inlined" - assert self._inputs[index][0] is not None, "Requested None inlined input" + self._inputs[index], InlinedInput), "Requested input which is not inlined" + arg = self._inputs[index].data + assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}" constant = None - arg = self._inputs[index][0] constant = self.arg_to_constant(arg) - assert constant is not None, f"Constant wasn't created for inlined input {index}" - return constant.outputs() + if constant is not None: + return constant.outputs() + else: + return [] def input(self, index): # TODO: remove return self.inputs()[index] # TODO: find specialized method @@ -257,9 +267,7 @@ def get_named_input(self, name): raise RuntimeError("This input is not a Node") def get_subgraph_size(self): - if issubclass(type(self.pt_module), torch.fx.Node): - return 0 - return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1 + return len(self.get_subgraphs()) def decoder_type_name(self) -> str: return "fx" @@ -277,9 +285,7 @@ def visit_subgraph(self, node_visitor): node_visitor(decoder) def get_subgraphs(self): - if issubclass(type(self.pt_module), torch.fx.Node): - return [] - return list(self.pt_module.blocks()) + return [] def get_subgraph_decoder(self, index): decoder = TorchFXPythonDecoder(self.get_subgraphs()[index], @@ -309,7 +315,7 @@ def _raw_output(self, index): return self._raw_outputs()[index] def _raw_inputs(self): - return [self._nodes[x] if not isinstance(x, tuple) and x < len(self._nodes) else x[0] for x in self._inputs] + return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs] def _raw_input(self, index): return self._raw_inputs()[index] @@ -347,7 +353,7 @@ def as_string(self): return None def input_is_none(self, index): - if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None): + if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None): return True else: r_input = self._raw_input(index) diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py index 610c4e744e32e3..095b9579f4b354 100644 --- a/src/bindings/python/wheel/setup.py +++ b/src/bindings/python/wheel/setup.py @@ -193,6 +193,13 @@ "install_dir": "runtime", "binary_dir": OPENVINO_BINARY_DIR, "source_dir": OPENVINO_SOURCE_DIR + }, + "tbb_dev": { + "name": "tbb_dev", + "prefix": f"{BUILD_BASE}/libs.tbb.dev", + "install_dir": "runtime/3rdparty/tbb", + "binary_dir": OPENVINO_BINARY_DIR, + "source_dir": OPENVINO_SOURCE_DIR } } @@ -266,7 +273,7 @@ def finalize_options(self): self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs) if self.cmake_args is None: - self.cmake_args = "" + self.cmake_args = os.getenv("CMAKE_ARGS", "") def cmake_build_and_install(self, install_cfg): """Runs cmake (configure, build and install) if artfiacts are not already built / installed.""" @@ -297,6 +304,7 @@ def cmake_build_and_install(self, install_cfg): f"-DPython3_EXECUTABLE={sys.executable}", f"-DCMAKE_BUILD_TYPE={CONFIG}", f"-DCPACK_GENERATOR={CPACK_GENERATOR}", + "-DENABLE_PYTHON=ON", "-DENABLE_WHEEL=OFF", self.cmake_args, "-S", source_dir, @@ -469,6 +477,8 @@ def copy_package_data(self, src_dirs): os.makedirs(package_dir, exist_ok=True) package_clibs_dir = os.path.join(PACKAGE_DIR, WHEEL_LIBS_INSTALL_DIR) os.makedirs(package_clibs_dir, exist_ok=True) + package_cmake_dir = os.path.join(package_dir, "cmake") + os.makedirs(package_cmake_dir, exist_ok=True) replacements = { # change the path where the libraries are installed (runtime/lib/intel64/Release -> openvino/libs) @@ -490,15 +500,20 @@ def copy_package_data(self, src_dirs): move(file_path, dst_file) self.announce(f"Move {file_path} to {dst_file}", level=3) + # collect all cmake files in one directory + for file_path in Path(src).rglob("*.cmake"): + file_name = os.path.basename(file_path) + if file_path.is_file(): + dst_file = os.path.join(package_cmake_dir, file_name) + self.announce(f"Move {file_path} to {dst_file}", level=3) + move(file_path, dst_file) + self.announce("Patch cmake configurations", level=3) + replace_strings_in_file(dst_file, replacements) + if os.path.isdir(src) and os.listdir(src): # copy the rest of the files to the package directly shutil.copytree(src, dst, dirs_exist_ok=True) - # patch cmake configurations - for file_path in Path(dst).rglob("*.cmake"): - if file_path.is_file(): - replace_strings_in_file(file_path, replacements) - def copy_file(src, dst, verbose=False, dry_run=False): """Custom file copy.""" diff --git a/src/common/low_precision_transformations/include/low_precision/broadcast.hpp b/src/common/low_precision_transformations/include/low_precision/broadcast.hpp new file mode 100644 index 00000000000000..39ba4052535c29 --- /dev/null +++ b/src/common/low_precision_transformations/include/low_precision/broadcast.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transparent_base_transformation.hpp" + +namespace ov { +namespace pass { +namespace low_precision { + +/** + * @ingroup ov_transformation_common_api + * @brief BroadcastTransformation propagates dequantization operations through Broadcast operation. + * + * For more details about the transformation, refer to + * [BroadcastTransformation](@ref openvino_docs_OV_UG_lpt_BroadcastTransformation) page + * in the OpenVINO Developer Guide. + */ +class LP_TRANSFORMATIONS_API BroadcastTransformation : public TransparentBaseTransformation { +public: + OPENVINO_RTTI("BroadcastTransformation", "0"); + BroadcastTransformation(const Params& params = Params()); + bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ov diff --git a/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp b/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp index 8a305db307c612..22aaf3281c2b94 100644 --- a/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp +++ b/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -23,6 +23,9 @@ class LP_TRANSFORMATIONS_API RecurrentCellTransformation : public LayerTransform static std::shared_ptr wrap_fake_quantize(const std::shared_ptr parameter); static std::shared_ptr wrap_quantization(const std::shared_ptr parameter); static std::shared_ptr wrap_dequantization(const std::shared_ptr parameter, const bool with_subtract); + +private: + void propagate(TransformationContext& context, const std::shared_ptr node); }; } // namespace low_precision diff --git a/src/common/low_precision_transformations/src/broadcast.cpp b/src/common/low_precision_transformations/src/broadcast.cpp new file mode 100644 index 00000000000000..5e78ca0ef50996 --- /dev/null +++ b/src/common/low_precision_transformations/src/broadcast.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/broadcast.hpp" + +#include + +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset3.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "low_precision/network_helper.hpp" + +#include "itt.hpp" + +using namespace ov::pass::low_precision; + +BroadcastTransformation::BroadcastTransformation(const Params& params) : TransparentBaseTransformation(params) { + MATCHER_SCOPE(BroadcastTransformation); + auto broadcast1 = pattern::wrap_type({ + pattern::wrap_type(), + ov::pass::pattern::any_input(), + ov::pass::pattern::any_input() }); + + auto broadcast3 = pattern::wrap_type({ + pattern::wrap_type(), + ov::pass::pattern::any_input(), + ov::pass::pattern::any_input() }); + + const auto matcher = std::make_shared(ov::OutputVector{ broadcast1, broadcast3 }); + + ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, matcher_name); + this->register_matcher(m, callback); +} + +bool BroadcastTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { + if (!LayerTransformation::canBeTransformed(context, layer)) { + return false; + } + + const auto& dequantization = NetworkHelper::getDequantization(layer, defaultPrecisions); + if (dequantization.empty()) { + return false; + } + + if (dequantization.isPerTensor()) { + return true; + } + + const auto& inputShape = layer->get_input_partial_shape(0); + if (inputShape.rank().is_dynamic() || inputShape[dequantization.channelDimIndex].is_dynamic()) { + return false; + } + + const auto targetShapeConstant = ov::as_type_ptr(layer->get_input_node_shared_ptr(1)); + const auto& targetShape = targetShapeConstant->cast_vector(); + if (targetShape[dequantization.channelDimIndex] != inputShape[dequantization.channelDimIndex].get_length()) { + return false; + } + + const auto axesMappingConstant = ov::as_type_ptr(layer->get_input_node_shared_ptr(2)); + const auto& axesMapping = axesMappingConstant->cast_vector(); + if (static_cast(axesMapping[dequantization.channelDimIndex]) != dequantization.channelDimIndex) { + return false; + } + + return true; +} diff --git a/src/common/low_precision_transformations/src/layer_transformation.cpp b/src/common/low_precision_transformations/src/layer_transformation.cpp index a4c0133c5813c3..4ec573c0f2a6ea 100644 --- a/src/common/low_precision_transformations/src/layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/layer_transformation.cpp @@ -401,6 +401,7 @@ std::shared_ptr LayerTransformation::moveDequantizationAfter( const FakeQuantizeDequantization& dequantization, const bool updateOutputPrecision, const bool moveSubtract) const { + OPENVINO_ASSERT(!dequantization.empty()); const auto result = ov::pass::low_precision::NetworkHelper::moveDequantizationAfter(operation, dequantization, updateOutputPrecision, diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index bba12f7e389be8..6435f47d12ffec 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -44,6 +44,7 @@ #include "low_precision/assign_and_read_value.hpp" #include "low_precision/avg_pool.hpp" #include "low_precision/batch_to_space.hpp" +#include "low_precision/broadcast.hpp" #include "low_precision/clamp.hpp" #include "low_precision/convolution.hpp" #include "low_precision/convolution_backprop_data.hpp" @@ -240,6 +241,7 @@ bool ov::pass::low_precision::LowPrecision::run_on_model(const std::shared_ptr() }, // TODO: there are conditions { name() }, + { name() }, + { name() }, { name() }, { name() }, { name() }, @@ -192,6 +195,8 @@ bool ov::pass::low_precision::MarkupPrecisions::isSupported(const std::shared_pt { name() }, { name() }, { name() }, + { name() }, + { name() }, { name() }, { name() }, // ? diff --git a/src/common/low_precision_transformations/src/recurrent_cell.cpp b/src/common/low_precision_transformations/src/recurrent_cell.cpp index 7fd40cf2071a0f..cec96044502596 100644 --- a/src/common/low_precision_transformations/src/recurrent_cell.cpp +++ b/src/common/low_precision_transformations/src/recurrent_cell.cpp @@ -1,17 +1,19 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "low_precision/recurrent_cell.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "openvino/opsets/opset1.hpp" - #include + #include "openvino/core/node.hpp" #include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset2.hpp" +#include "openvino/opsets/opset3.hpp" #include "openvino/opsets/opset5.hpp" +#include "openvino/opsets/opset12.hpp" #include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "low_precision/network_helper.hpp" #include "low_precision/rt_info/disable_cleanup_attribute.hpp" @@ -21,50 +23,14 @@ namespace pass { namespace low_precision { RecurrentCellTransformation::RecurrentCellTransformation(const Params& params) : LayerTransformation(params) { - const auto X = ov::pass::pattern::any_input(); - const auto H = ov::pass::pattern::any_input(); const auto C = ov::pass::pattern::any_input(); const auto S = ov::pass::pattern::any_input(); - const auto W = ov::pass::pattern::wrap_type(); - const auto R = ov::pass::pattern::wrap_type(); const auto B = ov::pass::pattern::wrap_type(); - const auto H_as_const = ov::pass::pattern::wrap_type(); - - const auto fq_X = wrap_fake_quantize(X); - const auto fq_H = wrap_fake_quantize(H); - const auto fq_W = wrap_fake_quantize(W); - const auto fq_R = wrap_fake_quantize(R); - - const auto dequantization_X = wrap_dequantization(ov::pass::pattern::any_input(), true); - const auto dequantization_H = wrap_dequantization(ov::pass::pattern::any_input(), true); - const auto dequantization_W = wrap_dequantization(ov::pass::pattern::any_input(), true); - const auto dequantization_R = wrap_dequantization(ov::pass::pattern::any_input(), true); - - const auto dequantization_without_subtract_X = wrap_dequantization(ov::pass::pattern::any_input(), false); - const auto dequantization_without_subtract_H = wrap_dequantization(ov::pass::pattern::any_input(), false); - const auto dequantization_without_subtract_W = wrap_dequantization(ov::pass::pattern::any_input(), false); - const auto dequantization_without_subtract_R = wrap_dequantization(ov::pass::pattern::any_input(), false); - - auto X_in = std::make_shared( - OutputVector{ - fq_X, dequantization_X, dequantization_without_subtract_X - }); - - auto H_in = std::make_shared( - OutputVector{ - H_as_const, fq_H, dequantization_H, dequantization_without_subtract_H - }); - - auto W_in = std::make_shared( - OutputVector{ - fq_W, dequantization_W, dequantization_without_subtract_W - }); - - auto R_in = std::make_shared( - OutputVector{ - fq_R, dequantization_R, dequantization_without_subtract_R - }); + auto X_in = ov::pass::pattern::any_input(); + auto H_in = ov::pass::pattern::any_input(); + auto W_in = ov::pass::pattern::any_input(); + auto R_in = ov::pass::pattern::any_input(); const auto lstm_seq = ov::pass::pattern::wrap_type( {X_in, H_in, C, S, W_in, R_in, B}); @@ -91,8 +57,134 @@ RecurrentCellTransformation::RecurrentCellTransformation(const Params& params) : this->register_matcher(m, callback); } +namespace { + +std::shared_ptr find_fake_quantize_upper(const std::shared_ptr& parent) { + if (auto fq = as_type_ptr(parent)) { + return fq; + } + + if (!NetworkHelper::isPrecisionPreserved(parent)) { + return nullptr; + } + + return find_fake_quantize_upper(parent->get_input_node_shared_ptr(0)); +} + +template +std::string name() { + return Operation::get_type_info_static().name; +} + +bool isSupportedForPerChannelQuantization(const std::shared_ptr& node) { + static const std::unordered_set supportedForPerChannelQuantization = { + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() } + }; + + return supportedForPerChannelQuantization.find(node->get_type_name()) != supportedForPerChannelQuantization.end(); +} + +std::vector> get_supported_precisions(std::shared_ptr lstm) { + // pair fields: + // 0 - input number, + // 1 - input type, `element::undefined` - any precision + if (is_type(lstm)) { + return std::vector>{ {0, element::u8}, { 1, element::u8 }, { 4, element::undefined }, { 5, element::undefined } }; + } else if (is_type(lstm)) { + return std::vector>{ {0, element::u8}, { 1, element::u8 }, { 3, element::undefined }, { 4, element::undefined } }; + } + + OPENVINO_THROW("unsupported operation type: ", lstm->get_type_name()); +} + +} // namespace + +void RecurrentCellTransformation::propagate(TransformationContext& context, const std::shared_ptr node) { + if (!isSupportedForPerChannelQuantization(node)) { + return; + } + + const auto& normalized_node = NetworkHelper::separateInStandaloneBranch(node, defaultPrecisions); + auto dequantization = NetworkHelper::getDequantization(node, defaultPrecisions); + if (dequantization.empty()) { + return; + } + const auto& new_node = moveDequantizationAfter(context, normalized_node, dequantization); + + const auto& new_dequantization = NetworkHelper::getDequantizationBelow(new_node); + if (new_dequantization.empty()) { + return; + } + + for (auto output : new_dequantization.multiply->outputs()) { + for (auto input : output.get_target_inputs()) { + auto child = input.get_node()->shared_from_this(); + propagate(context, child); + } + } +} + bool RecurrentCellTransformation::transform(TransformationContext& context, ov::pass::pattern::Matcher& m) { const auto lstm = m.get_match_root(); + const auto inputs = get_supported_precisions(lstm); + for (const auto& input : inputs) { + const auto& parent = lstm->get_input_node_shared_ptr(input.first); + if (!isSupportedForPerChannelQuantization(parent)) { + continue; + } + + const auto& fq = find_fake_quantize_upper(parent); + if (fq != nullptr) { + const auto& quantizationDetails = QuantizationDetails::getDetails(fq); + if ((quantizationDetails.inputLowValues.size() != 1) || (quantizationDetails.inputHighValues.size() != 1) || + (quantizationDetails.outputLowValues.size() != 1) || (quantizationDetails.outputHighValues.size() != 1)) { + continue; + } + + const auto& precisionsAttribute = getAttributeFromOutput(fq); + const auto& precisions = precisionsAttribute.empty() ? + defaultPrecisions : + precisionsAttribute.as().value(); + const auto& dataPrecision = getDataPrecision(fq, quantizationDetails, precisions); + if (dataPrecision.empty() || ((input.second != element::undefined) && (dataPrecision.precision != input.second))) { + return false; + } + + auto result = NetworkHelper::decomposeFakeQuantize( + fq, + dataPrecision.precision, + dataPrecision.min, + dataPrecision.max, + dataPrecision.hasZeroPoint, + updatePrecisions); + auto multiply = std::get<1>(result); + + for (const auto& output : multiply->outputs()) { + for (const auto& input : output.get_target_inputs()) { + const auto input_node = input.get_node(); + propagate(context, input_node->shared_from_this()); + } + } + } + } + if (!canBeTransformed(context, lstm)) { return false; } @@ -154,18 +246,21 @@ bool RecurrentCellTransformation::transform(TransformationContext& context, ov:: } bool RecurrentCellTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr lstm) const { - std::shared_ptr W, R; - - if (is_type(lstm)) { - W = lstm->get_input_node_shared_ptr(4); - R = lstm->get_input_node_shared_ptr(5); - } else if (is_type(lstm)) { - W = lstm->get_input_node_shared_ptr(3); - R = lstm->get_input_node_shared_ptr(4); - } else { - return false; - } + const auto inputs = get_supported_precisions(lstm); + for (const auto& index : inputs) { + const auto& input = lstm->get_input_node_ptr(index.first); + if (as_type(input) || as_type(input)) { + continue; + } + const auto dequantization = NetworkHelper::getDequantization(lstm, defaultPrecisions, index.first); + if (dequantization.empty()) { + continue; + } + if ((index.second != element::undefined) && (dequantization.data.get_element_type() != index.second)) { + return false; + } + } return true; } diff --git a/src/common/low_precision_transformations/tests/broadcast_transformation.cpp b/src/common/low_precision_transformations/tests/broadcast_transformation.cpp new file mode 100644 index 00000000000000..7745f38143d440 --- /dev/null +++ b/src/common/low_precision_transformations/tests/broadcast_transformation.cpp @@ -0,0 +1,197 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "low_precision/broadcast.hpp" +#include "ov_lpt_models/broadcast.hpp" +#include "simple_low_precision_transformer.hpp" + +namespace { +using namespace ov::pass; +using namespace ov::builder::subgraph; +using namespace ov::opset1; +using namespace ov; + +class BroadcastTransformationTestValues { +public: + class Pattern { + public: + ov::element::Type precisionBeforeDequantization; + ov::builder::subgraph::DequantizationOperations dequantizationBefore; + ov::builder::subgraph::DequantizationOperations dequantizationAfter; + }; + + TestTransformationParams params; + Shape tagetShape; + Shape axesMapping; + Pattern actual; + Pattern expected; +}; + +typedef std::tuple< + ov::PartialShape, + bool, + BroadcastTransformationTestValues> BroadcastTransformationParams; + +class BroadcastTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const ov::PartialShape inputShape = std::get<0>(GetParam()); + const bool v1 = std::get<1>(GetParam()); + const BroadcastTransformationTestValues testValues = std::get<2>(GetParam()); + + // batch update support + auto tagetShape = testValues.tagetShape; + tagetShape[0] = inputShape[0].get_length(); + + actualFunction = BroadcastFunction::get( + v1, + inputShape, + testValues.actual.precisionBeforeDequantization, + testValues.actual.dequantizationBefore, + tagetShape, + testValues.axesMapping, + testValues.actual.dequantizationAfter); + + SimpleLowPrecisionTransformer transform; + transform.add(testValues.params); + transform.transform(actualFunction); + + referenceFunction = BroadcastFunction::get( + v1, + inputShape, + testValues.expected.precisionBeforeDequantization, + testValues.expected.dequantizationBefore, + tagetShape, + testValues.axesMapping, + testValues.expected.dequantizationAfter); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const ov::PartialShape inputShape = std::get<0>(obj.param); + const bool v1 = std::get<1>(obj.param); + const BroadcastTransformationTestValues testValues = std::get<2>(obj.param); + + std::ostringstream result; + result << + v1 << "_" << + inputShape << "_" << + testValues.tagetShape << "_" << + testValues.axesMapping << "_" << + testValues.actual.precisionBeforeDequantization << "_" << + testValues.actual.dequantizationBefore << "_" << + testValues.actual.dequantizationAfter << "_" << + testValues.expected.precisionBeforeDequantization << "_" << + testValues.expected.dequantizationBefore << "_" << + testValues.expected.dequantizationAfter; + return result.str(); + } +}; + +TEST_P(BroadcastTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + + auto res = compare_functions(actualFunction, referenceFunction, true); + ASSERT_TRUE(res.first) << res.second; + + ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique"; +} + +namespace hw_broadcast { +const std::vector inputShapes = { + { 1, 3, 1, 1 }, + { 4, 3, 1, 1 }, +}; + +const std::vector testValues = { + { + LayerTransformation::createParamsU8I8(), + { 1, 3, 9, 9}, + { 0, 1, 2, 3 }, + { + ov::element::u8, + {{ov::element::f32}, {0.1f}, {0.2f}}, + {{}, {}, {}}, + }, + { + ov::element::u8, + {{}, {}, {}}, + {{ov::element::f32}, {0.1f}, {0.2f}} + } + }, + { + LayerTransformation::createParamsU8I8(), + { 1, 3, 9, 9 }, + { 0, 1, 2, 3 }, + { + ov::element::u8, + { + {ov::element::f32}, + {{0.1f, 0.2f, 0.3f}}, + {{0.4f, 0.5f, 0.6f}} + } + }, + { + ov::element::u8, + { {}, {}, {}}, + { + {ov::element::f32}, + {{0.1f, 0.2f, 0.3f}}, + {{0.4f, 0.5f, 0.6f}} + } + } + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + BroadcastTransformation, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn({ true, false }), + ::testing::ValuesIn(testValues)), + BroadcastTransformation::getTestCaseName); +} // hw_broadcast + +namespace chw_broadcast { +const std::vector inputShapes = { + { 1, 1, 1, 1 } +}; + +const std::vector testValues = { + { + LayerTransformation::createParamsU8I8(), + { 1, 9, 9, 9}, + { 0, 1, 2, 3 }, + { + ov::element::u8, + {{ov::element::f32}, {0.1f}, {0.2f}}, + {{}, {}, {}}, + }, + { + ov::element::u8, + {{}, {}, {}}, + {{ov::element::f32}, {0.1f}, {0.2f}} + } + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + BroadcastTransformation, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn({ true, false }), + ::testing::ValuesIn(testValues)), + BroadcastTransformation::getTestCaseName); +} // chw_broadcast + +} // namespace diff --git a/src/common/snippets/include/snippets/kernel_executor_table.hpp b/src/common/snippets/include/snippets/kernel_executor_table.hpp index 46f9cd04b923ba..af797e4c80422a 100644 --- a/src/common/snippets/include/snippets/kernel_executor_table.hpp +++ b/src/common/snippets/include/snippets/kernel_executor_table.hpp @@ -43,7 +43,7 @@ class KernelExecutorBase { * @brief Update current kernel config in accordance with the passed expression. Corresponding kernel is recompiled if necessary. * This method should be called to update KernelExecutor based on runtime info (e.g. shapes) available through expression ptr */ - virtual void update_by_expression(const lowered::ExpressionPtr& expr) = 0; + virtual void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) = 0; /** * @brief Replace current kernel config with the provided value. Corresponding kernel is recompiled if necessary. * This method should be called to restore a saved state of the executor, that was configured using update_by_expression(). @@ -70,8 +70,8 @@ class KernelExecutor : public KernelExecutorBase { explicit KernelExecutor(Conf c) : KernelExecutorBase(), m_config{std::move(c)} {} // Note: override when final is redundant, but needed to avoid warnings on some compilers - void update_by_expression(const lowered::ExpressionPtr& expr) override final { // NOLINT - update_config(expr, m_config); + void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) override final { // NOLINT + update_config(expr, linear_ir, m_config); OPENVINO_ASSERT(m_config.is_completed(), "Failed to update kernel config in update_by_expression"); update_kernel(m_config, m_kernel); OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor"); @@ -103,7 +103,7 @@ class KernelExecutor : public KernelExecutorBase { protected: /*** Updates stored kernel config based on runtime info from expression (e.g. new input shapes). */ - virtual void update_config(const lowered::ExpressionPtr& expr, Conf& config) const = 0; + virtual void update_config(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir, Conf& config) const = 0; /*** Updates stored kernel in accordance with the passed config. Recompilation of the kernel is * performed if necessary. */ virtual void update_kernel(const Conf& c, std::shared_ptr& kernel) const = 0; @@ -130,9 +130,9 @@ class KernelExecutorTable { return m_table.at(expr); } /*** Updates every registered KernelExecutor in accordance with the corresponding expression */ - void update_state() const { + void update_state(const lowered::LinearIRPtr& linear_ir) const { for (const auto& record : m_table) - record.second->update_by_expression(record.first); + record.second->update_by_expression(record.first, linear_ir); } /*** Returns lambda function that contains current state of the table, and restores this state when called */ diff --git a/src/common/snippets/include/snippets/lowered/loop_info.hpp b/src/common/snippets/include/snippets/lowered/loop_info.hpp index e763f2244d76c6..6be47f49d17ae1 100644 --- a/src/common/snippets/include/snippets/lowered/loop_info.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_info.hpp @@ -430,7 +430,8 @@ class ExpandedLoopInfo : public LoopInfo { ExpandedLoopInfo(size_t work_amount, size_t increment, const std::vector& entries, const std::vector& exits, std::vector ptr_increments, std::vector final_offsets, std::vector data_sizes, - SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const = false); + SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const = false, + bool evaluate_once = false); /** * @brief Clone LoopInfo with new expressions * @param expr_map map of new and old expressions @@ -474,7 +475,18 @@ class ExpandedLoopInfo : public LoopInfo { * @return const ref of `m_data_sizes` */ const std::vector& get_data_sizes() const; + /** + * @brief Returns True if the current Loop should be executed once + * Otherwise, returns False + * @return `m_evaluance_once` + */ + bool is_evaluate_once() const; + /** + * @brief Set value to `m_evaluance_once` + * @param value - new value of `m_evaluance_once` + */ + void set_evaluate_once(bool value); /** * @brief Update `m_ptr_increments` using copy values from `new_values`. * The count of new values must be equal to the count of current increments. @@ -517,6 +529,8 @@ class ExpandedLoopInfo : public LoopInfo { const SpecificLoopIterType m_type = {}; std::shared_ptr m_unified_loop_info = {}; + + bool m_evaluate_once = false; }; using ExpandedLoopInfoPtr = std::shared_ptr; diff --git a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp index 3fc429bec4df1e..2d5c72c06ef983 100644 --- a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp @@ -20,12 +20,6 @@ using PortDescriptorPtr = std::shared_ptr; class PortDescriptor { friend class LinearIRBuilder; public: - // The structure with service values for scheduling parameters - struct ServiceDimensions { - // The value for the subtensor that means that scheduling should be by full dimension - static size_t FULL_DIM; - }; - explicit PortDescriptor(const ov::Input& node, VectorDims subtensor_shape = {}, std::vector layout = {}); @@ -54,6 +48,9 @@ class PortDescriptor { void set_reg_type(RegType type) { m_reg.type = type; } void set_reg_idx(size_t idx) { m_reg.idx = idx; } + // Indexing starts from the end (rbegin() + idx) + void set_subtensor_dim(size_t idx, VectorDims::value_type value); + std::string serialize() const; bool empty() const { return m_layout.empty() && m_subtensor_shape.empty();} PortDescriptorPtr clone() const; @@ -87,6 +84,8 @@ class PortDescriptorUtils { public: static void set_port_descriptor_ptr(const ov::Input& n, const PortDescriptorPtr& desc); static void set_port_descriptor_ptr(const ov::Output& n, const PortDescriptorPtr& desc); + static void set_port_descriptor(const ov::Input& n, std::vector subtensor, std::vector layout = {}); + static void set_port_descriptor(const ov::Output& n, std::vector subtensor, std::vector layout = {}); static PortDescriptorPtr get_port_descriptor_ptr(const ov::Input& in); static PortDescriptorPtr get_port_descriptor_ptr(const ov::Input& out); @@ -116,17 +115,6 @@ class PortDescriptorVectorAttribute : public ov::RuntimeAttribute { std::vector outputs{}; }; -template -void set_port_desc(const T& port, std::vector subtensor) { - const auto& shape = port.get_shape(); - for (size_t i = 1; i <= std::min(subtensor.size(), shape.size()); i++) { - auto& dim = subtensor[subtensor.size() - i]; - if (dim != PortDescriptor::ServiceDimensions::FULL_DIM) - dim = std::min(dim, shape[shape.size() - i]); - } - PortDescriptorUtils::set_port_descriptor_ptr(port, std::make_shared(shape, subtensor)); -} - } // namespace lowered } // namespace snippets } // namespace ov diff --git a/src/common/snippets/include/snippets/runtime_configurator.hpp b/src/common/snippets/include/snippets/runtime_configurator.hpp index 059771d961df82..058eca59716d1b 100644 --- a/src/common/snippets/include/snippets/runtime_configurator.hpp +++ b/src/common/snippets/include/snippets/runtime_configurator.hpp @@ -61,7 +61,7 @@ class RuntimeConfigurator { * @param linear_ir LinearIR * @return updated config */ - const std::shared_ptr& get_updated_config(const std::shared_ptr& linear_ir); + const std::shared_ptr& get_updated_config(const lowered::LinearIRPtr& linear_ir); /*** Returns pointer to KernelExecutorTable owned by the config */ const std::shared_ptr& get_kernel_executor_table() const { return m_config->kernel_executor_table; } @@ -70,19 +70,19 @@ class RuntimeConfigurator { * @brief Update RuntimeConfig based on LinearIR * @param linear_ir LinearIR */ - virtual void update(const std::shared_ptr& linear_ir); + virtual void update(const lowered::LinearIRPtr& linear_ir); /** * @brief Allocate and intialize fields in RuntimeConfig and RuntimeConfigurator * @param linear_ir LinearIR */ - virtual void initialization(const std::shared_ptr& linear_ir); + virtual void initialization(const lowered::LinearIRPtr& linear_ir); /** * @brief Initializes input and data information of LinearIR: * descriptors (that contains shapes and layouts) and data_sizes * @param linear_ir LinearIR */ - void init_data_info(const std::shared_ptr& linear_ir); + void init_data_info(const lowered::LinearIRPtr& linear_ir); /** * @brief Initializes information of buffers: * - static buffer_scratchpad_size @@ -90,23 +90,23 @@ class RuntimeConfigurator { * - clusters with dynamic buffers (`m_dynamic_buffer_clusters`) for the quick access in `update()` * @param linear_ir LinearIR */ - void init_buffer_info(const std::shared_ptr& linear_ir); + void init_buffer_info(const lowered::LinearIRPtr& linear_ir); /** * @brief Initializes tensor rank of config * @param linear_ir LinearIR */ - virtual void init_tensor_rank(const std::shared_ptr& linear_ir) const; + virtual void init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const; /** * @brief Update Loop informations in LinearIR: Unified and ExpandedLoopInfo * @param linear_ir LinearIR */ - void update_loop_info(const std::shared_ptr& linear_ir) const; + void update_loop_info(const lowered::LinearIRPtr& linear_ir) const; /** * @brief Update Buffer scratchpad size and offsets if needed * Note: `update_loop_info` must be called before * @param linear_ir LinearIR */ - void update_buffer_scratchpad_size(const std::shared_ptr& linear_ir) const; + void update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const; /** * @brief Calculate data offsets of LinearIR and update these values in RuntimeConfig */ diff --git a/src/common/snippets/include/snippets/utils/utils.hpp b/src/common/snippets/include/snippets/utils/utils.hpp index 33eebcffedf68b..869956b5274c60 100644 --- a/src/common/snippets/include/snippets/utils/utils.hpp +++ b/src/common/snippets/include/snippets/utils/utils.hpp @@ -21,6 +21,26 @@ namespace ov { namespace snippets { namespace utils { +/* --- Special values --- */ +template::value || std::is_same::value), bool>::type> +constexpr inline T get_dynamic_value() { + return std::numeric_limits::max(); +} +template::value || std::is_same::value), bool>::type> +constexpr inline bool is_dynamic_value(T value) { + return value == get_dynamic_value(); +} + +// This value means full dimension +// For example, for the subtensor it means that scheduling should be by full dimension +constexpr inline size_t get_full_dim_value() { + return get_dynamic_value() - 1; +} +constexpr inline bool is_full_dim_value(size_t value) { + return value == get_full_dim_value(); +} +/* ---------------------- */ + // Get non-scalar Constant count that will be created after FakeQuantize decomposition. // This count is needed to know exact count of non-scalar Constants during tokenization. auto get_non_scalar_constant_count_for_fq(const std::shared_ptr& fq) -> size_t; @@ -59,16 +79,6 @@ inline T div_up(const T a, const U b) { return static_cast((a + b - 1) / b); } -template::value || std::is_same::value), bool>::type> -constexpr inline T get_dynamic_value() { - return std::numeric_limits::max(); -} - -template::value || std::is_same::value), bool>::type> -constexpr inline bool is_dynamic_value(T value) { - return value == get_dynamic_value(); -} - inline bool is_dynamic_vdims(const VectorDims& shape) { return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return is_dynamic_value(v); }); } diff --git a/src/common/snippets/src/lowered/loop_info.cpp b/src/common/snippets/src/lowered/loop_info.cpp index 6f14a52e750feb..d99788fad12946 100644 --- a/src/common/snippets/src/lowered/loop_info.cpp +++ b/src/common/snippets/src/lowered/loop_info.cpp @@ -373,10 +373,10 @@ void UnifiedLoopInfo::add_loop_ports(const std::vector& ports) { ExpandedLoopInfo::ExpandedLoopInfo(size_t work_amount, size_t increment, const std::vector& entries, const std::vector& exits, std::vector ptr_increments, std::vector final_offsets, std::vector data_sizes, - SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const) + SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const, bool evaluate_once) : LoopInfo(work_amount, increment, entries, exits, is_wa_const), m_ptr_increments(std::move(ptr_increments)), m_finalization_offsets(std::move(final_offsets)), - m_data_sizes(std::move(data_sizes)), m_type(type), m_unified_loop_info(std::move(unified_loop_info)) { + m_data_sizes(std::move(data_sizes)), m_type(type), m_unified_loop_info(std::move(unified_loop_info)), m_evaluate_once(evaluate_once) { validate(); } @@ -392,7 +392,8 @@ std::shared_ptr ExpandedLoopInfo::clone_with_new_expr(const Expression const auto& new_output_ports = clone_loop_ports(expr_map, m_output_ports); return std::make_shared(m_work_amount, m_increment, new_input_ports, new_output_ports, - m_ptr_increments, m_finalization_offsets, m_data_sizes, m_type, m_unified_loop_info, m_is_work_amount_const); + m_ptr_increments, m_finalization_offsets, m_data_sizes, m_type, + m_unified_loop_info, m_is_work_amount_const, m_evaluate_once); } bool ExpandedLoopInfo::is_dynamic() const { @@ -435,6 +436,14 @@ const std::vector& ExpandedLoopInfo::get_data_sizes() const { return m_data_sizes; } +bool ExpandedLoopInfo::is_evaluate_once() const { + return m_evaluate_once; +} + +void ExpandedLoopInfo::set_evaluate_once(bool value) { + m_evaluate_once = value; +} + void ExpandedLoopInfo::update_ptr_increments(const std::vector& new_values) { OPENVINO_ASSERT(new_values.size() == m_ptr_increments.size(), "Failed to update ptr_increments: incompatible counts"); m_ptr_increments.assign(new_values.cbegin(), new_values.end()); diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 3e07ec850927ab..09f8ccb94b9660 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -160,7 +160,6 @@ void LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth, size_t vector_size) { - const auto FULL_DIM = PortDescriptor::ServiceDimensions::FULL_DIM; std::vector loop_input_ports, loop_output_ports; LoopManager::get_io_loop_ports(loop_begin_pos, loop_end_pos, loop_input_ports, loop_output_ports); @@ -178,8 +177,8 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, "Failed to broadcast work amount in marking loop"); }; - auto is_outside_loop = [&FULL_DIM](const std::vector& subtensor) { - return std::all_of(subtensor.begin(), subtensor.end(), [&FULL_DIM](size_t lhs) { return lhs == FULL_DIM; }); + auto is_outside_loop = [](const std::vector& subtensor) { + return std::all_of(subtensor.begin(), subtensor.end(), utils::is_full_dim_value); }; std::vector loop_subtensor; @@ -192,7 +191,7 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, subtensor[subtensor.size() - 1] = vector_size; } - const size_t resizing_value = is_outside_loop(subtensor) ? FULL_DIM : 1; + const size_t resizing_value = is_outside_loop(subtensor) ? utils::get_full_dim_value() : 1; while (subtensor.size() < loop_depth) subtensor.insert(subtensor.begin(), resizing_value); if (loop_subtensor.empty()) @@ -202,7 +201,7 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, "Incorrect scheduling parameters for loop"); for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { - if (*(subtensor.rbegin() + dim_idx) != FULL_DIM) { + if (!utils::is_full_dim_value(*(subtensor.rbegin() + dim_idx))) { broadcast(loop_tensor, shape, dim_idx); } } @@ -211,7 +210,7 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { OPENVINO_ASSERT(dim_idx < loop_subtensor.size(), "Incorrect indexes of Loop for markup"); const auto& subtensor_value = *(loop_subtensor.rbegin() + dim_idx); - if (subtensor_value == FULL_DIM) { + if (utils::is_full_dim_value(subtensor_value)) { continue; } diff --git a/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp b/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp index e4664800995db1..028cdde1088e60 100644 --- a/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp +++ b/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp @@ -60,7 +60,7 @@ size_t ComputeBufferAllocationSize::get_allocation_size(const LoopManagerPtr& lo const auto processing_rank = !processed_dim_idxs.empty() ? std::max(*processed_dim_idxs.rbegin(), subtensor.size()) : subtensor.size(); for (size_t i = 0; i < std::min(processing_rank, rank); ++i) { if (processed_dim_idxs.count(i) == 0) { - if (i < subtensor.size()) + if (i < subtensor.size() && !utils::is_full_dim_value(*(subtensor.rbegin() + i))) allocation_size = utils::dynamic_safe_mul(allocation_size, std::min(*(planar_shape.rbegin() + i), *(subtensor.rbegin() + i))); else allocation_size = utils::dynamic_safe_mul(allocation_size, *(planar_shape.rbegin() + i)); diff --git a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp index 2ef872ba4ad262..dcff90015d28f2 100644 --- a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp +++ b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp @@ -167,6 +167,7 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir, LinearIR::constExp if (is_decomposed_loop_needed(unified_loop_info, iter_type, remaining_work_amount)) { const auto work_amount = get_decomposed_loop_work_amount(unified_loop_info, iter_type, remaining_work_amount); const auto increment = get_decomposed_loop_increment(unified_loop_info, iter_type, remaining_work_amount); + const auto evaluate_once = !utils::is_dynamic_value(work_amount) && work_amount == increment; // Update remaining Loop work amount // Note: if work_amount is unknown and increment = 1, it means that a loop will iterate by whole work_amount if (!is_wa_dynamic || increment == 1) { @@ -199,7 +200,7 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir, LinearIR::constExp const auto decomposed_loop_info = std::make_shared(work_amount, increment, decomposed_loop_entry_ports, decomposed_loop_exit_ports, decomposed_ptr_increments, decomposed_finalization_offsets, - decomposed_data_sizes, iter_type, unified_loop_info); + decomposed_data_sizes, iter_type, unified_loop_info, false, evaluate_once); init_decomposed_loop(linear_ir, decomposed_loop_begin_it, decomposed_loop_end_it, decomposed_loop_info, loop_id, decomposed_loop_end); decomposed = true; diff --git a/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp b/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp index c19bf7d65a2fef..c6255d90106e77 100644 --- a/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp +++ b/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp @@ -4,6 +4,7 @@ #include "snippets/lowered/pass/optimize_loop_single_evaluation.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/lowered/linear_ir.hpp" #include "snippets/op/loop.hpp" #include "snippets/utils/utils.hpp" @@ -16,30 +17,31 @@ namespace pass { bool OptimizeLoopSingleEvaluation::run(lowered::LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::OptimizeLoopSingleEvaluation") + const auto& loop_manager = linear_ir.get_loop_manager(); + bool is_modified = false; for (auto expr_it = begin; expr_it != end; ++expr_it) { const auto& expr = *expr_it; if (auto loop_end = ov::as_type_ptr(expr->get_node())) { - // *1* solo vector/tail loop + empty outer loop - // => skip increments (both counter & ptr) : set evaluate_once flag - // *2* solo vector/tail loop + non-empty outer loop - // => skip counter increments but perform ptr increments : set evaluate_once, - // and perform pointer increments through finalization offsets - // *3* vector loop(s) + one tail loop - // => vector as usual, tail depends on outer loop, see *1* and *2* - if (loop_end->has_dynamic_params() || loop_end->get_work_amount() >= 2 * loop_end->get_increment()) - continue; - - auto new_finalization_offsets = loop_end->get_finalization_offsets(); - const auto& ptr_increments = loop_end->get_ptr_increments(); - const auto work_amount_incr = static_cast(loop_end->get_increment()); - for (size_t i = 0; i < new_finalization_offsets.size(); i++) { - new_finalization_offsets[i] += ptr_increments[i] * work_amount_incr; + const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id()); + if (loop_info->is_evaluate_once()) { + auto new_finalization_offsets = loop_end->get_finalization_offsets(); + const auto& ptr_increments = loop_end->get_ptr_increments(); + const auto work_amount_incr = static_cast(loop_end->get_increment()); + for (size_t i = 0; i < new_finalization_offsets.size(); i++) { + const auto ptr_shift = utils::dynamic_safe_mul(ptr_increments[i], work_amount_incr); + new_finalization_offsets[i] = utils::dynamic_safe_add(new_finalization_offsets[i], ptr_shift); + } + loop_end->set_finalization_offsets(new_finalization_offsets); + loop_end->set_ptr_increments(std::vector(new_finalization_offsets.size(), 0)); + loop_end->set_evaluate_once(true); + + // Update the corresponding ExpandedLoopInfo + loop_info->update_ptr_increments(loop_end->get_ptr_increments()); + loop_info->update_finalization_offsets(loop_end->get_finalization_offsets()); + + is_modified = true; } - loop_end->set_finalization_offsets(new_finalization_offsets); - loop_end->set_ptr_increments(std::vector(new_finalization_offsets.size(), 0)); - loop_end->set_evaluate_once(true); - is_modified = true; } } return is_modified; diff --git a/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp b/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp index b58de6790c23a4..c89274a728c4c9 100644 --- a/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp @@ -15,14 +15,43 @@ namespace snippets { namespace lowered { namespace pass { namespace { + +// The algorithm uses the following special values in subtensors/shapes: +// 1. Dynamic value in subtensor/shape : SIZE_MAX +// 2. Full dimension in subtensor : SIZE_MAX - 1 +// 3. Default value of `new_dim_value` : SIZE_MAX - 2 +// 4. `Forced` special dynamic value : SIZE_MAX - 3 +// +// We have to introduce `FORCED_DYNAMIC_VALUE` to distinguish `new_dim_value = DYNAMIC` +// from the real dynamic values in subtensors and shapes and force this value in subtensors. +// For example, there is Brgemm with the following info in the tail Loop: +// Input 0: shape [?, ?], existing subtensor [32, FULL_DIM] +// Input 1: shape [?, ?], existing subtensor [FULL_DIM, FULL_DIM] +// Output : shape [?, ?], existing subtensor [32, FULL_DIM] +// If the user wants to force `?` in the place of `32` in subtensors, the steps will be: +// 1. Set `?` to subtensor and shape of Input 0 : +// shape [?, ?] (shape has not been changed!), new subtensor [?, FULL_DIM] +// 2. Make shape inference of Brgemm and get Output: +// shape [?, ?] (shape has not been changed!), existing subtensor [FULL_DIM, FULL_DIM] +// 3. Update subtensor on output using shape: +// new_subtensor[i] = std::min(planar_shape[i], subtensor[i]); // i = 0: std::min(SIZE_MAX(?), 32) +// new subtensor [32, FULL_DIM] - has not been changed! But should be [?, FULL_DIM] +// Conclusion: we have to distinguish forced dynamic value with existing dynamic values in shape and subtensor + +constexpr size_t NEW_DEFAULT_VALUE = SIZE_MAX - 2; +constexpr size_t FORCED_DYNAMIC_VALUE = SIZE_MAX - 3; + void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir, const LoopInfoPtr& loop_info, LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end, bool most_outer_loop, - const size_t new_dim_value = SIZE_MAX) { - OPENVINO_ASSERT(snippets::utils::implication(most_outer_loop, new_dim_value != SIZE_MAX), + size_t new_dim_value = NEW_DEFAULT_VALUE) { + // Marks the forced dynamic value + new_dim_value = utils::is_dynamic_value(new_dim_value) ? FORCED_DYNAMIC_VALUE : new_dim_value; + OPENVINO_ASSERT(snippets::utils::implication(most_outer_loop, new_dim_value != NEW_DEFAULT_VALUE), "if the updated subtensor propagation was called for the outer loop, new_dim_value must not be equal to default value"); + std::map original_shapes; // First step: set new dim value to the corresponding input_ports' dimensions if (most_outer_loop) { @@ -32,9 +61,8 @@ void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir, const auto& expr = port.expr_port->get_expr(); const auto& desc = port.expr_port->get_descriptor_ptr(); auto subtensor = desc->get_subtensor(); - if (port.dim_idx < subtensor.size()) { - *(subtensor.rbegin() + port.dim_idx) = new_dim_value; - desc->set_subtensor(subtensor); + if (port.dim_idx < desc->get_subtensor().size()) { + desc->set_subtensor_dim(port.dim_idx, new_dim_value); } const auto parent_desc = expr->get_input_port_connector(port.expr_port->get_index())->get_source().get_descriptor_ptr(); @@ -78,7 +106,9 @@ void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir, const size_t subtensor_start = planar_dims.size() - subtensor.size(); VectorDims new_subtensor(planar_dims.begin() + subtensor_start, planar_dims.end()); for (size_t i = 0; i < new_subtensor.size(); ++i) { - new_subtensor[i] = std::min(new_subtensor[i], subtensor[i]); + // If user forces dynamic value to set in subtensor, set real dynamic dimension using `get_dynamic_value()` + new_subtensor[i] = new_subtensor[i] == FORCED_DYNAMIC_VALUE ? utils::get_dynamic_value() : + utils::is_full_dim_value(subtensor[i]) ? subtensor[i] : std::min(new_subtensor[i], subtensor[i]); } desc->set_subtensor(new_subtensor); } diff --git a/src/common/snippets/src/lowered/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp index 3280be29973b69..e5fd3638e831c8 100644 --- a/src/common/snippets/src/lowered/port_descriptor.cpp +++ b/src/common/snippets/src/lowered/port_descriptor.cpp @@ -9,8 +9,6 @@ namespace ov { namespace snippets { namespace lowered { -size_t PortDescriptor::ServiceDimensions::FULL_DIM = SIZE_MAX; - PortDescriptor::PortDescriptor(const ov::Input& in, VectorDims subtensor_shape, std::vector layout) : PortDescriptor(ov::Input(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {} @@ -53,6 +51,11 @@ void PortDescriptor::set_shape(const VectorDims& tensor) { *m_tensor_shape = tensor; } +void PortDescriptor::set_subtensor_dim(size_t idx, VectorDims::value_type value) { + OPENVINO_ASSERT(idx < m_subtensor_shape.size(), "Failed to set subtensor value: idx should be less than size"); + *(m_subtensor_shape.rbegin() + idx) = value; +} + PortDescriptorPtr PortDescriptor::clone() const { auto desc = std::make_shared(*m_tensor_shape, m_subtensor_shape, m_layout); desc->set_reg(m_reg); @@ -130,6 +133,26 @@ void PortDescriptorUtils::set_port_descriptor_ptr(const ov::Output& ou } } +namespace { +template +void set_port_desc(const T& port, std::vector subtensor, std::vector layout) { + const auto& shape = port.get_shape(); + for (size_t i = 1; i <= std::min(subtensor.size(), shape.size()); i++) { + auto& dim = subtensor[subtensor.size() - i]; + if (!utils::is_full_dim_value(dim)) + dim = std::min(dim, shape[shape.size() - i]); + } + PortDescriptorUtils::set_port_descriptor_ptr(port, std::make_shared(shape, subtensor, layout)); +} +} // namespace + +void PortDescriptorUtils::set_port_descriptor(const ov::Input& in, std::vector subtensor, std::vector layout) { + set_port_desc(in, subtensor, layout); +} +void PortDescriptorUtils::set_port_descriptor(const ov::Output& in, std::vector subtensor, std::vector layout) { + set_port_desc(in, subtensor, layout); +} + PortDescriptorPtr PortDescriptorUtils::get_port_descriptor_ptr(const ov::Input& in) { return get_port_descriptor_ptr(ov::Input(in.get_node(), in.get_index())); } diff --git a/src/common/snippets/src/op/reduce.cpp b/src/common/snippets/src/op/reduce.cpp index 5717bfe1255300..b0b69e0bd7e84c 100644 --- a/src/common/snippets/src/op/reduce.cpp +++ b/src/common/snippets/src/op/reduce.cpp @@ -5,6 +5,7 @@ #include "snippets/op/reduce.hpp" #include "snippets/itt.hpp" +#include "snippets/utils/utils.hpp" #include "snippets/lowered/port_descriptor.hpp" namespace ov { @@ -33,7 +34,7 @@ void ReduceBase::compute_and_set_reduce_subtensors(const std::shared_ptr subtensor(reduce_rank, 1); for (size_t i = axis; i < reduce_rank; ++i) - subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM; + subtensor[i] = utils::get_full_dim_value(); lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->input(0), std::make_shared(reduce->input(0), subtensor)); lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->output(0), std::make_shared(reduce->output(0), subtensor)); } diff --git a/src/common/snippets/src/op/serialization_node.cpp b/src/common/snippets/src/op/serialization_node.cpp index cb17e8a57ddf24..9864a1a12f94a5 100644 --- a/src/common/snippets/src/op/serialization_node.cpp +++ b/src/common/snippets/src/op/serialization_node.cpp @@ -49,7 +49,8 @@ bool SerializationNode::visit_attributes(AttributeVisitor &visitor) { std::stringstream ss; for (size_t i = 0; i < subtensor.size(); ++i) { const auto& v = subtensor[i]; - const auto v_str = (v == lowered::PortDescriptor::ServiceDimensions::FULL_DIM) ? "FULL_DIM" : std::to_string(v); + const auto v_str = utils::is_full_dim_value(v) ? "FULL_DIM" : + utils::is_dynamic_value(v) ? "?" : std::to_string(v); const auto del = i < subtensor.size() - 1 ? ", " : ""; ss << v_str << del; } diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index a33d478ee3929d..4ede0b58a66cf0 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -552,7 +552,7 @@ snippets::Schedule Subgraph::generate(const void* compile_params) const { exec_table->replace_key_expression(expression_map.at(expr.get()), expr); // Some kernel executors might've been registered during code emission. // We need to update them, so appropriate kernels will be compiled. - exec_table->update_state(); + exec_table->update_state(m_linear_ir); return {std::move(lowering_result)}; } diff --git a/src/common/snippets/src/pass/matmul_to_brgemm.cpp b/src/common/snippets/src/pass/matmul_to_brgemm.cpp index 6eaf8424ff5a78..7268d4a7cc6a67 100644 --- a/src/common/snippets/src/pass/matmul_to_brgemm.cpp +++ b/src/common/snippets/src/pass/matmul_to_brgemm.cpp @@ -18,16 +18,12 @@ namespace snippets { namespace pass { void MatMulToBrgemm::init_ports(const std::shared_ptr& brgemm) const { - auto get_subtensor = []() { - return std::vector{ lowered::PortDescriptor::ServiceDimensions::FULL_DIM, lowered::PortDescriptor::ServiceDimensions::FULL_DIM }; - }; + const auto subtensor = std::vector(2, utils::get_full_dim_value()); for (const auto& input : brgemm->inputs()) { const auto& tensor = utils::pshape_to_vdims(input.get_partial_shape()); - const auto& subtensor = get_subtensor(); lowered::PortDescriptorUtils::set_port_descriptor_ptr(input, std::make_shared(tensor, subtensor)); } const auto& tensor = utils::pshape_to_vdims(brgemm->get_output_partial_shape(0)); - const auto& subtensor = get_subtensor(); lowered::PortDescriptorUtils::set_port_descriptor_ptr(brgemm->output(0), std::make_shared(tensor, subtensor)); } diff --git a/src/common/snippets/src/pass/softmax_decomposition.cpp b/src/common/snippets/src/pass/softmax_decomposition.cpp index 269d06c958dd39..34dc1c19c5d9d0 100644 --- a/src/common/snippets/src/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/pass/softmax_decomposition.cpp @@ -55,7 +55,7 @@ SoftmaxDecomposition::SoftmaxDecomposition() { OPENVINO_ASSERT(axis < rank, "Softmax has incorrect axis"); std::vector subtensor(rank, 1); for (size_t i = axis; i < rank; ++i) - subtensor[i] = PortDescriptor::ServiceDimensions::FULL_DIM; + subtensor[i] = utils::get_full_dim_value(); PortDescriptorUtils::set_port_descriptor_ptr(power->input(0), std::make_shared(power->input(0), subtensor)); PortDescriptorUtils::set_port_descriptor_ptr(power->output(0), std::make_shared(power->output(0), subtensor)); diff --git a/src/common/snippets/src/runtime_configurator.cpp b/src/common/snippets/src/runtime_configurator.cpp index c3db1864bf1135..6f8945649c2b94 100644 --- a/src/common/snippets/src/runtime_configurator.cpp +++ b/src/common/snippets/src/runtime_configurator.cpp @@ -35,7 +35,7 @@ RuntimeConfigurator::RuntimeConfigurator(std::shared_ptr c) : OPENVINO_ASSERT(m_config, "Runtime config is nullptr!"); } -const std::shared_ptr& RuntimeConfigurator::get_updated_config(const std::shared_ptr& linear_ir) { +const std::shared_ptr& RuntimeConfigurator::get_updated_config(const lowered::LinearIRPtr& linear_ir) { // First initialization if (m_io_num == 0) initialization(linear_ir); @@ -44,7 +44,7 @@ const std::shared_ptr& RuntimeConfigurator::get_updated_config(co return m_config; } -void RuntimeConfigurator::initialization(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::initialization(const lowered::LinearIRPtr& linear_ir) { init_data_info(linear_ir); init_tensor_rank(linear_ir); init_buffer_info(linear_ir); @@ -55,7 +55,7 @@ void RuntimeConfigurator::initialization(const std::shared_ptrtile_rank = linear_ir->get_config().m_loop_depth; } -void RuntimeConfigurator::update(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::update(const lowered::LinearIRPtr& linear_ir) { if (linear_ir->is_dynamic()) { update_loop_info(linear_ir); update_buffer_scratchpad_size(linear_ir); @@ -67,11 +67,11 @@ void RuntimeConfigurator::update(const std::shared_ptr& linea update_latest_shapes(); } -void RuntimeConfigurator::init_tensor_rank(const std::shared_ptr& linear_ir) const { +void RuntimeConfigurator::init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const { m_config->tensor_rank = linear_ir->get_master_shape().size(); } -void RuntimeConfigurator::init_data_info(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::init_data_info(const lowered::LinearIRPtr& linear_ir) { const auto& parameters = linear_ir->get_parameters(); const auto& results = linear_ir->get_results(); m_in_num = parameters.size(); @@ -113,7 +113,7 @@ void RuntimeConfigurator::init_data_info(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRPtr& linear_ir) { std::map> dynamic_buffer_clusters, static_buffer_clusters; // All needed checks are in Validate pass @@ -143,7 +143,7 @@ void RuntimeConfigurator::init_buffer_info(const std::shared_ptr& linear_ir) const { +void RuntimeConfigurator::update_loop_info(const lowered::LinearIRPtr& linear_ir) const { // Initialized UnifiedLoopInfo struct CurrentUnifiedLoopInfo { size_t current_work_amount = 0; @@ -180,21 +180,27 @@ void RuntimeConfigurator::update_loop_info(const std::shared_ptrset_work_amount( - lowered::pass::InsertSpecificIterations::get_decomposed_loop_work_amount(current_unified_loop_info, decomposed_loop_type, current_work_amount)); + const auto work_amount = + lowered::pass::InsertSpecificIterations::get_decomposed_loop_work_amount(current_unified_loop_info, decomposed_loop_type, current_work_amount); + expanded_loop_info->set_work_amount(work_amount); // Update remaining Loop work amount - current_work_amount -= expanded_loop_info->get_work_amount(); - - expanded_loop_info->update_ptr_increments(ptr_increments); - if (current_work_amount > 0) { - expanded_loop_info->update_finalization_offsets(std::vector(finalization_offsets.size(), 0)); + current_work_amount -= work_amount; + + // Update only `finalization offsets`. `Ptr increments` are always zeroed in this case + auto updated_finalization_offsets = current_work_amount > 0 ? std::vector(finalization_offsets.size(), 0) : finalization_offsets; + if (expanded_loop_info->is_evaluate_once()) { + expanded_loop_info->set_increment(work_amount); + // work_amount is equal to increment in cases with `evaluate_once` + for (size_t i = 0; i < updated_finalization_offsets.size(); ++i) + updated_finalization_offsets[i] += ptr_increments[i] * work_amount; } else { - expanded_loop_info->update_finalization_offsets(finalization_offsets); + expanded_loop_info->update_ptr_increments(ptr_increments); } + expanded_loop_info->update_finalization_offsets(updated_finalization_offsets); } } -void RuntimeConfigurator::update_buffer_scratchpad_size(const std::shared_ptr& linear_ir) const { +void RuntimeConfigurator::update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const { const auto& loop_manager = linear_ir->get_loop_manager(); m_config->buffer_scratchpad_size = linear_ir->get_static_buffer_scratchpad_size(); diff --git a/src/common/snippets/tests/include/lir_test_utils.hpp b/src/common/snippets/tests/include/lir_test_utils.hpp index 2f687f6e1412d1..b653c86af8ab0b 100644 --- a/src/common/snippets/tests/include/lir_test_utils.hpp +++ b/src/common/snippets/tests/include/lir_test_utils.hpp @@ -44,38 +44,6 @@ void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, const std::vector& subtensors = {}, const std::vector& layouts = {}); -/** - * @brief Creates unified loop info based on provided entry and exit points, and adds it to the linear_ir's loops map - * @attention This helper wraps LoopManager::mark_loop method, but only for LoopInfo creation (whereas original - * mark_loop method also marks expressions with the corresponding loop info). - * @param linear_ir linear_ir in which loop info should be added - * @param entries entry points of loop - * @param exits exit points of loop - */ -void create_and_add_unified_loop_info(const std::shared_ptr& linear_ir, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool add_default_handlers = true); -/** - * @brief Creates unified loop info based on provided entry and exit points, and adds it to the linear_ir's loops map. - * Meanwhile set loop id to expr range [loop_begin_pos, loop_end_pos). - * @attention This helper wraps LoopManager::mark_loop method, which also marks expressions with the corresponding loop info - * @param linear_ir linear_ir in which loop info should be added - * @param loop_begin_pos begin expr postion in this loop - * @param loop_end_pos end expr postion in this loop - * @param entries entry points of loop - * @param exits exit points of loop - */ -void create_and_add_unified_loop_info(const std::shared_ptr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool add_default_handlers = true); } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/lir_test_utils.cpp b/src/common/snippets/tests/src/lir_test_utils.cpp index 274480fcd84c85..c4f5047011cd08 100644 --- a/src/common/snippets/tests/src/lir_test_utils.cpp +++ b/src/common/snippets/tests/src/lir_test_utils.cpp @@ -39,9 +39,7 @@ void LoweredPassTestsF::TearDown() { } ov::snippets::VectorDims get_default_subtensor() { - static const VectorDims default_subtensor{PortDescriptor::ServiceDimensions::FULL_DIM, - PortDescriptor::ServiceDimensions::FULL_DIM}; - return default_subtensor; + return VectorDims(2, ov::snippets::utils::get_full_dim_value()); } void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, @@ -85,28 +83,6 @@ void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, } } -void create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool set_default_handlers) { - // Equal begin and end iterators are set to avoid expressions marking with new loop id - create_and_add_unified_loop_info(linear_ir, linear_ir->begin(), linear_ir->begin(), work_amount, increment, entries, exits, set_default_handlers); -} - -void create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool set_default_handlers) { - const auto& loop_manager = linear_ir->get_loop_manager(); - loop_manager->mark_loop(loop_begin_pos, loop_end_pos, work_amount, increment, entries, exits, set_default_handlers); -} - } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp index e56a31a8e92a4c..4dc3f2dae7e867 100644 --- a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp +++ b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp @@ -95,8 +95,7 @@ void BufferAllocationTest::Validate() { std::shared_ptr EltwiseBufferAllocationTest::GetModel() const { const auto subtensor_eltwise = std::vector{1, m_vector_size}; - const auto subtensor_buffer = std::vector{ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM, - ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM}; + const auto subtensor_buffer = std::vector(2, ov::snippets::utils::get_full_dim_value()); const auto parameter0 = std::make_shared(ov::element::f32, ov::PartialShape({1, 3, 100, 100})); const auto parameter1 = std::make_shared(ov::element::f32, ov::PartialShape({1, 3, 100, 100})); diff --git a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp b/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp index c3f4f5ea7f6877..ee762f4bfca746 100644 --- a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp +++ b/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp @@ -66,11 +66,11 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithParams) { auto result = linear_ir->push_node(sub.second); auto begin = multiply.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*multiply.first)->get_input_port(1)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0)), + LoopPort((*multiply.first)->get_input_port(1)), + LoopPort((*sub.first)->get_input_port(0))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); linear_ir->set_loop_depth(1); } { @@ -85,10 +85,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithParams) { auto result = linear_ir_ref->push_node(sub.second); auto begin = sub.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir_ref->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(1))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); } } @@ -124,10 +124,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithScalar) { auto result = linear_ir->push_node(sub.second); auto begin = scalar.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(0))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); linear_ir->set_loop_depth(1); } { @@ -142,10 +142,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithScalar) { auto result = linear_ir_ref->push_node(sub.second); auto begin = sub.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir_ref->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(1))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); } } @@ -187,20 +187,20 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsOutputLoopUpdateNotNeed auto result1 = linear_ir->push_node(sub.second); auto begin = multiply.first; auto end = result1.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 16, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, begin, end, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 16, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 0), + LoopPort((*multiply.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*sub.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0), + LoopPort((*sub.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 3, 1, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 1), + LoopPort((*multiply.first)->get_input_port(1), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*sub.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1), + LoopPort((*sub.first)->get_output_port(0), true, 1)}); linear_ir->set_loop_depth(2); } { @@ -218,21 +218,21 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsOutputLoopUpdateNotNeed auto result1 = linear_ir_ref->push_node(sub.second); auto begin_inner = add.first; auto end_inner = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_inner, end_inner, 16, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); + linear_ir_ref->get_loop_manager()->mark_loop(begin_inner, end_inner, 16, vector_size, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0), + LoopPort((*sub.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0), + LoopPort((*sub.first)->get_output_port(0), true, 0)}); auto begin_outer = multiply.first; auto end_outer = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_outer, end_outer, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(begin_outer, end_outer, 3, 1, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 1), + LoopPort((*multiply.first)->get_input_port(1), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*sub.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1), + LoopPort((*sub.first)->get_output_port(0), true, 1)}); } } @@ -263,14 +263,14 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsFromInnermostToLoopOuts auto add = linear_ir->push_node(param_0.second, broadcastmove.second); init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir->push_node(add.second); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 3, 1, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 512, vector_size, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(broadcastmove.first, result.first, 3, 1, + std::vector{LoopPort((*broadcastmove.first)->get_input_port(0), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(broadcastmove.first, result.first, 512, vector_size, + std::vector{LoopPort((*broadcastmove.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0)}); linear_ir->set_loop_depth(2); } { @@ -281,14 +281,14 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsFromInnermostToLoopOuts auto add = linear_ir_ref->push_node(param_0.second, broadcastmove.second); init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir_ref->push_node(add.second); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 3, 1, - {LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(1), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 512, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir_ref->get_loop_manager()->mark_loop(add.first, result.first, 3, 1, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*add.first)->get_input_port(1), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(add.first, result.first, 512, vector_size, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0)}); } } @@ -356,31 +356,31 @@ TEST_F(ExtractLoopInvariantsRemoveLoopsTest, ExtractedLoopInvariantsAllExprsInLo init_expr_descriptors(*multiply.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir->push_node(multiply.second); // 3 inner loop - create_and_add_unified_loop_info(linear_ir, max.first, hmax.first, 1, vector_size, - {LoopPort((*max.first)->get_input_port(0), true, 0), - LoopPort((*max.first)->get_input_port(1), true, 0)}, - {LoopPort((*max.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, sub.first, hsum.first, 1, vector_size, - {LoopPort((*sub.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*exp.first)->get_output_port(0), true, 0), - LoopPort((*add.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, multiply.first, result.first, 1, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(max.first, hmax.first, 1, vector_size, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 0), + LoopPort((*max.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*max.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(sub.first, hsum.first, 1, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0), true, 0), + LoopPort((*sub.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*exp.first)->get_output_port(0), true, 0), + LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(multiply.first, result.first, 1, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 0), + LoopPort((*multiply.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 0)}); // outer loop info const auto loop_begin = std::make_shared(); auto loop_begin_expr = linear_ir->insert_node(loop_begin, std::vector{}, {}, false, max.first); const auto loop_end = std::make_shared(); std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; auto loop_end_expr = linear_ir->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(loop_begin_expr, result.first, 10, 1, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 1), + LoopPort((*max.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 1)}); loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); linear_ir->set_loop_depth(2); } @@ -409,11 +409,11 @@ TEST_F(ExtractLoopInvariantsRemoveLoopsTest, ExtractedLoopInvariantsAllExprsInLo const auto loop_end = std::make_shared(); std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; auto loop_end_expr = linear_ir_ref->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir_ref, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(loop_begin_expr, result.first, 10, 1, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 1), + LoopPort((*max.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 1)}); loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); } } diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp index 8dbcf7ba285f5b..502d89cb205aeb 100644 --- a/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp +++ b/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp @@ -36,18 +36,21 @@ TSConcatForward::TSConcatForward() { return false; } - if (concat_node->get_output_partial_shape(0).is_dynamic()) { - return false; + auto concat_axis = concat_node->get_axis(); + if (concat_axis < 0) { + if (concat_node->get_output_partial_shape(0).rank().is_dynamic()) { + return false; + } + const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); + concat_axis = ov::util::normalize(concat_axis, rank); } + // todo: support dyn rank case bool updated = sink_forward::UpdateInputTransposes(main_node, transpose_info); if (!updated) { return false; } - const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); - const auto concat_axis = ov::util::normalize(concat_node->get_axis(), rank); - const auto transpose_axis_order = transpose_info.transpose_const->get_axis_vector_val(); const int64_t transposed_concat_axis = transpose_axis_order[concat_axis]; concat_node->set_axis(transposed_concat_axis); @@ -83,12 +86,19 @@ TSConcatBackward::TSConcatBackward() { } auto concat_node = as_type_ptr(main_node); - if (concat_node->get_output_partial_shape(0).is_dynamic()) { + if (!concat_node) { return false; } - const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); - auto concat_axis = ov::util::normalize(concat_node->get_axis(), rank); + auto concat_axis = concat_node->get_axis(); + if (concat_axis < 0) { + if (concat_node->get_output_partial_shape(0).rank().is_dynamic()) { + return false; + } + + const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); + concat_axis = ov::util::normalize(concat_axis, rank); + } const auto transpose_axis_order = transpose_const->get_axis_vector_val(); const auto reversed_transpose_axis_order = ReverseTransposeOrder(transpose_axis_order); diff --git a/src/core/src/runtime/itensor.cpp b/src/core/src/runtime/itensor.cpp index 203297c671d401..b1b517426b9f67 100644 --- a/src/core/src/runtime/itensor.cpp +++ b/src/core/src/runtime/itensor.cpp @@ -16,6 +16,21 @@ namespace ov { +namespace { +Strides default_byte_strides(const Shape& shape, const element::Type& et) { + auto strides = Strides(shape.size()); + if (!strides.empty()) { + strides.back() = et.size(); + std::transform(shape.crbegin(), + shape.crend() - 1, + strides.rbegin(), + strides.rbegin() + 1, + std::multiplies()); + } + return strides; +} +} // namespace + ITensor::~ITensor() = default; size_t ITensor::get_size() const { @@ -31,31 +46,13 @@ bool ITensor::is_continuous() const { // OpenVINO doesn't support strides for lp types return true; } - const auto& shape = get_shape(); - const auto& type = get_element_type(); - std::vector strides(shape.size()); - if (!shape.empty()) { - strides[shape.size() - 1] = 1; - } - auto size = shape.size(); - for (size_t i = 1; i < size; i++) { - strides[size - i - 1] = strides[size - i] * shape[size - i]; - } - - ov::Strides byte_strides(strides.size()); - for (size_t i = 0; i < strides.size(); ++i) - byte_strides[i] = strides[i] * type.size(); - return byte_strides == get_strides(); + return default_byte_strides(get_shape(), get_element_type()) == get_strides(); } void ITensor::copy_to(const std::shared_ptr& dst) const { const auto& is_scalar = [](const ov::Shape& shape) { return shape.empty() || (shape.size() == 1 && shape[0] == 1); }; - const auto shapes_equal = [is_scalar](const ov::Shape& src, const ov::Shape& dst) { - // WA for scalar tensors to copy {1} to {} or otherwise - return src == dst || (is_scalar(src) && is_scalar(dst)); - }; OPENVINO_ASSERT(dst, "Destination tensor was not initialized."); OPENVINO_ASSERT(!dynamic_cast(this), "Default copy to doesn't support copy from remote tensor."); @@ -68,16 +65,11 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { dst->get_element_type(), ")"); - if (dst->get_shape() == ov::Shape{0}) - dst->set_shape(get_shape()); - - OPENVINO_ASSERT(shapes_equal(get_shape(), dst->get_shape()), - "Tensor shapes are not equal. (src: ", - get_shape(), - " != dst: ", - dst->get_shape(), - ")"); const auto& shape = get_shape(); + if (shape != dst->get_shape()) { + dst->set_shape(shape); + } + auto* src_data = static_cast(data()); auto* dst_data = static_cast(dst->data()); ov::Strides src_strides{get_byte_size()}; @@ -86,25 +78,15 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { ov::Shape max_pos{1}; if (get_element_type().bitwidth() < 8 || (get_strides() == dst->get_strides() && is_continuous()) || - (is_scalar(get_shape()) && is_scalar(dst->get_shape()))) { + (is_scalar(shape) && is_scalar(dst->get_shape()))) { // OpenVINO doesn't support strides for LP types // or both tensors have default strides // Strides and positions already initialized } else { // Tensors have default strides const auto& type = get_element_type(); - std::vector strides(shape.size()); - if (!shape.empty()) { - strides[shape.size() - 1] = 1; - } - auto size = shape.size(); - for (size_t i = 1; i < size; i++) { - strides[size - i - 1] = strides[size - i] * shape[size - i]; - } - - ov::Strides default_strides(strides.size()); - for (size_t i = 0; i < strides.size(); ++i) - default_strides[i] = strides[i] * type.size(); + const auto shape_rank = shape.size(); + const auto default_strides = default_byte_strides(shape, type); src_strides = get_strides(); dst_strides = dst->get_strides(); @@ -113,8 +95,7 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { // Calculate src and dst shapes bool found_step = false; - for (size_t i = 0; i < shape.size(); i++) { - size_t inverted_idx = shape.size() - i - 1; + for (size_t inverted_idx = shape_rank - 1; inverted_idx < shape_rank; --inverted_idx) { if (!found_step) { if (default_strides[inverted_idx] == src_strides[inverted_idx] && src_strides[inverted_idx] == dst_strides[inverted_idx]) { @@ -134,7 +115,7 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { if (strides_size < default_strides.size()) { strides = default_strides[strides_size]; - dim = get_shape()[strides_size]; + dim = shape[strides_size]; } src_str[strides_size] = strides; dst_str[strides_size] = strides; @@ -151,13 +132,8 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { dst_strides = std::move(dst_str); } - const auto update_index = [](const ov::Shape& pos, const ov::Shape& shape, const ov::Strides& strides) { - size_t offset = 0; - - for (size_t i = 0; i < pos.size(); i++) { - offset += pos[i] * strides[i]; - } - return offset; + const auto update_index = [](const ov::Shape& pos, const ov::Strides& strides) { + return std::inner_product(pos.begin(), pos.end(), strides.begin(), static_cast(0)); }; using copy_function_def = std::function; @@ -190,8 +166,8 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { else finish = true; } - src_idx = update_index(cur_pos, max_pos, src_strides); - dst_idx = update_index(cur_pos, max_pos, dst_strides); + src_idx = update_index(cur_pos, src_strides); + dst_idx = update_index(cur_pos, dst_strides); } } diff --git a/src/core/tests/ov_tensor_test.cpp b/src/core/tests/ov_tensor_test.cpp index a6832f2bb5aff9..8e610196b6e4a1 100644 --- a/src/core/tests/ov_tensor_test.cpp +++ b/src/core/tests/ov_tensor_test.cpp @@ -936,6 +936,18 @@ INSTANTIATE_TEST_SUITE_P(copy_tests, TestParams { ov::Shape{}, {}, {1}, {} + }, + TestParams{ + ov::Shape{3,2,2}, {}, + ov::Shape{5}, {} + }, + TestParams{ + ov::Shape{3,2,2}, ov::Strides{64,16,8}, + ov::Shape{5,2}, {} + }, + TestParams{ + ov::Shape{3,2,2}, ov::Strides{64,16,8}, + ov::Shape{3,4,3}, ov::Strides{128,24,8} } ))); diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 9295b388048baa..a39c6b067528fe 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -225,7 +225,8 @@ std::shared_ptr TranslateSession::convert_pytorch_model( } }; - FRONT_END_GENERAL_CHECK(pytorch_model->get_subgraph_size() == 1, "Model should have exactly 1 subgraph."); + FRONT_END_GENERAL_CHECK(pytorch_model->decoder_type_name() != "ts" || pytorch_model->get_subgraph_size() == 1, + "Model should have exactly 1 subgraph for TorchScript."); pytorch_model->visit_subgraph(node_visitor); ResultVector results; @@ -368,10 +369,7 @@ void TranslateSession::encode_tensor_name(Output output, namespace { bool is_number(const std::string& s) { - std::string::const_iterator it = s.begin(); - while (it != s.end() && std::isdigit(*it)) - ++it; - return !s.empty() && it == s.end(); + return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); } } // namespace diff --git a/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp b/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp index 7bb1d4bf1d3905..3709891a5e3000 100644 --- a/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp +++ b/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp @@ -34,7 +34,7 @@ namespace level_zero { * @brief This class represents an abstraction for NPU plugin remote tensor * which can be shared with user-supplied LevelZero buffer. * The plugin object derived from this class can be obtained with ZeroContext::create_tensor() call. - * @note User can obtain LevelZero buffer handle from this class. + * @note User can obtain Level Zero buffer handle from this class. * @ingroup ov_runtime_level_zero_npu_cpp_api */ class ZeroBufferTensor : public RemoteTensor { diff --git a/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp b/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp index f058b5ece45971..0d29d44b6ff170 100644 --- a/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp +++ b/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp @@ -19,24 +19,16 @@ using npu_handle_param = void*; /** * @brief Enum to define the type of the shared memory buffer + * @ingroup ov_runtime_level_zero_npu_cpp_api */ enum class MemType { - L0_INTERNAL_BUF = 0, //!< Internal L0 buffer type allocated by plugin + L0_INTERNAL_BUF = 0, //!< Internal Level Zero buffer type allocated by plugin SHARED_BUF = 1, //!< Shared buffer }; -/** - * @brief Enum to define the type of the tensor - */ -enum class TensorType { - INPUT = 0, //!< Tensor is only used as input - OUTPUT = 1, //!< Tensor is only used as output - BINDED = 2 //!< Tensor could be used as input and output -}; - /** @cond INTERNAL */ -inline std::ostream& operator<<(std::ostream& os, const MemType& share_mem_type) { - switch (share_mem_type) { +inline std::ostream& operator<<(std::ostream& os, const MemType& mem_type) { + switch (mem_type) { case MemType::L0_INTERNAL_BUF: return os << "L0_INTERNAL_BUF"; case MemType::SHARED_BUF: @@ -46,13 +38,13 @@ inline std::ostream& operator<<(std::ostream& os, const MemType& share_mem_type) } } -inline std::istream& operator>>(std::istream& is, MemType& share_mem_type) { +inline std::istream& operator>>(std::istream& is, MemType& mem_type) { std::string str; is >> str; if (str == "L0_INTERNAL_BUF") { - share_mem_type = MemType::L0_INTERNAL_BUF; + mem_type = MemType::L0_INTERNAL_BUF; } else if (str == "SHARED_BUF") { - share_mem_type = MemType::SHARED_BUF; + mem_type = MemType::SHARED_BUF; } else { OPENVINO_THROW("Unsupported memory type: ", str); } @@ -63,24 +55,68 @@ inline std::istream& operator>>(std::istream& is, MemType& share_mem_type) { /** * @brief This key identifies type of internal shared memory * in a shared memory tensor parameter map. + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property mem_type{"MEM_TYPE"}; /** * @brief This key identifies memory handle * in a shared memory tensor parameter map + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property mem_handle{"MEM_HANDLE"}; /** * @brief This key identifies LevelZero context handle - * in a shared context or shared memory tensor parameter map + * in a shared context parameter map + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property l0_context{"L0_CONTEXT"}; /** - * @brief This key identifies type of the tensor - * in a shared memory tensor parameter map. + * @brief Enum to define the type of the tensor + * @ingroup ov_runtime_level_zero_npu_cpp_api + */ +enum class TensorType { + INPUT = 0, //!< Tensor is only used as input + OUTPUT = 1, //!< Tensor is only used as output + BINDED = 2 //!< Tensor could be used as input and output +}; + +/** @cond INTERNAL */ +inline std::ostream& operator<<(std::ostream& os, const TensorType& tensor_type) { + switch (tensor_type) { + case TensorType::INPUT: + return os << "INPUT"; + case TensorType::OUTPUT: + return os << "OUTPUT"; + case TensorType::BINDED: + return os << "BINDED"; + default: + OPENVINO_THROW("Unsupported tensor type"); + } +} + +inline std::istream& operator>>(std::istream& is, TensorType& tensor_type) { + std::string str; + is >> str; + if (str == "INPUT") { + tensor_type = TensorType::INPUT; + } else if (str == "OUTPUT") { + tensor_type = TensorType::OUTPUT; + } else if (str == "BINDED") { + tensor_type = TensorType::BINDED; + } else { + OPENVINO_THROW("Unsupported tensor type: ", str); + } + return is; +} +/** @endcond */ + +/** + * @brief This key sets the type of the internal Level Zero buffer + * allocated by the plugin in a shared memory tensor parameter map. + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property tensor_type{"TENSOR_TYPE"}; diff --git a/src/plugins/auto_batch/src/plugin.cpp b/src/plugins/auto_batch/src/plugin.cpp index de125cb12551af..d97987bea6f39a 100644 --- a/src/plugins/auto_batch/src/plugin.cpp +++ b/src/plugins/auto_batch/src/plugin.cpp @@ -19,9 +19,10 @@ namespace ov { namespace autobatch_plugin { -std::vector supported_configKeys = {ov::device::priorities.name(), - ov::auto_batch_timeout.name(), - ov::enable_profiling.name()}; +std::vector supported_configKeys = { + ov::PropertyName{ov::device::priorities.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::auto_batch_timeout.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::enable_profiling.name(), ov::PropertyMutability::RW}}; inline ov::AnyMap merge_properties(ov::AnyMap config, const ov::AnyMap& user_config) { for (auto&& kvp : user_config) { @@ -82,9 +83,13 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return {it->second}; } } else if (name == ov::supported_properties.name()) { - return std::vector{ - ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::device::full_name.name(), ov::PropertyMutability::RO}}; + std::vector property_name; + property_name.push_back(ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO}); + property_name.push_back(ov::PropertyName{ov::device::full_name.name(), ov::PropertyMutability::RO}); + for (auto& it : supported_configKeys) { + property_name.push_back(it); + } + return decltype(ov::supported_properties)::value_type(std::move(property_name)); } else if (name == ov::internal::supported_properties.name()) { return decltype(ov::internal::supported_properties)::value_type{}; } else if (name == ov::device::full_name.name()) { @@ -113,6 +118,7 @@ OV_DEFINE_PLUGIN_CREATE_FUNCTION(Plugin, version) Plugin::Plugin() { set_device_name("BATCH"); m_plugin_config.insert(ov::auto_batch_timeout(1000)); // default value (ms) + m_plugin_config.insert(ov::enable_profiling(false)); } std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, @@ -132,7 +138,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto full_properties = merge_properties(m_plugin_config, properties); auto device_batch = full_properties.find(ov::device::priorities.name()); if (device_batch == full_properties.end()) { - OPENVINO_THROW("ov::device::priorities key for AUTO NATCH is not set for BATCH device"); + OPENVINO_THROW("ov::device::priorities key for AUTO BATCH is not set for BATCH device"); } auto meta_device = parse_meta_device(device_batch->second.as(), properties); diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 82e4d3fde3ac14..60351d14b3e89e 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -3,15 +3,17 @@ // #include "dnnl_extension_utils.h" -#include "memory_desc/dnnl_blocked_memory_desc.h" -#include "onednn/iml_type_mapper.h" -#include "utils/general_utils.h" + #include #include #include - #include +#include "cpu_memory.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "onednn/iml_type_mapper.h" +#include "utils/general_utils.h" + using namespace dnnl; namespace ov { @@ -254,5 +256,11 @@ bool DnnlExtensionUtils::isUnarySupportedAsPostOp(Algorithm alg) { #endif } +std::string DnnlExtensionUtils::computeWeightsStringHash(const std::shared_ptr memory, + const std::shared_ptr dstDesc) { + const auto desc_hash = dnnl::impl::primitive_hashing::get_md_hash(*dstDesc->getDnnlDesc().get()); + return std::to_string(desc_hash) + "_" + std::to_string(reinterpret_cast(memory->getData())); +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.h b/src/plugins/intel_cpu/src/dnnl_extension_utils.h index 5def48284ab062..cdc6342e8963fd 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.h +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.h @@ -22,6 +22,7 @@ namespace intel_cpu { class DnnlMemoryDesc; class DnnlBlockedMemoryDesc; class Shape; +class IMemory; class DnnlExtensionUtils { public: @@ -101,6 +102,13 @@ class DnnlExtensionUtils { static dnnl_memory_desc_t clone_desc(const_dnnl_memory_desc_t cdesc); static const char* query_pd_info(const_dnnl_primitive_desc_t pd); static bool isUnarySupportedAsPostOp(Algorithm alg); + /** + * @brief Computes weights string hash based on weights memory and requested descriptor + * @param memory Weights memory pointer + * @param dstDesc descriptor defining weights representation after repacking + * @return string hash + */ + static std::string computeWeightsStringHash(const std::shared_ptr memory, const std::shared_ptr dstDesc); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 1dec30581dd71a..3d29173788d658 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -705,23 +705,18 @@ void jit_gelu_tanh_emitter::emit_isa(const std::vector &in_vec_idxs, con h->ld1r(vmm_aux1.s, table_val2("gelu_tanh_sqrt_two_over_pi")); h->fmul(vmm_aux0.s, vmm_aux1.s, vmm_aux2.s); - const bool store_src = vmm_src.getIdx() == vmm_dst.getIdx(); - if (store_src) { - h->mov(vmm_aux2.b16, vmm_src.b16); - } - tanh_emitter->emit_code( { vmm_aux0.getIdx() }, - { vmm_aux0.getIdx() }, + { vmm_aux2.getIdx() }, aux_vec_idxs, aux_gpr_idxs); // compute 0.5 * x * (1 + tanh(G(x))) h->ld1r(vmm_aux1.s, table_val2("one")); - h->fadd(vmm_aux0.s, vmm_aux1.s, vmm_aux0.s); + h->fadd(vmm_aux0.s, vmm_aux1.s, vmm_aux2.s); h->ld1r(vmm_aux1.s, table_val2("half")); h->fmul(vmm_aux0.s, vmm_aux1.s, vmm_aux0.s); - h->fmul(vmm_dst.s, store_src ? vmm_aux2.s : vmm_src.s, vmm_aux0.s); + h->fmul(vmm_dst.s, vmm_src.s, vmm_aux0.s); } void jit_gelu_tanh_emitter::register_table_entries() { @@ -1219,6 +1214,8 @@ jit_mod_emitter::jit_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, size_t jit_mod_emitter::get_inputs_count() const { return 2; } +size_t jit_mod_emitter::get_aux_vecs_count() const { return 1; } + void jit_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -1233,14 +1230,15 @@ void jit_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; - TReg divend = TReg(in_vec_idxs[0]); + TReg dividend = TReg(in_vec_idxs[0]); TReg divisor = TReg(in_vec_idxs[1]); TReg r = TReg(out_vec_idxs[0]); + TReg aux = TReg(aux_vec_idxs[0]); - h->uni_fdiv(r.s, divend.s, divisor.s); - h->frintz(r.s, r.s); - h->uni_fmul(r.s, r.s, divisor.s); - h->uni_fsub(r.s, divend.s, r.s); + h->fdiv(aux.s, dividend.s, divisor.s); + h->frintz(aux.s, aux.s); + h->fmul(aux.s, aux.s, divisor.s); + h->fsub(r.s, dividend.s, aux.s); } std::set> jit_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { @@ -1874,7 +1872,7 @@ void jit_tanh_emitter::emit_isa(const std::vector &in_vec_idxs, const st TReg src = TReg(in_vec_idxs[0]); TReg dst = TReg(out_vec_idxs[0]); - TReg aux = TReg(aux_vec_idxs.back()); + TReg aux = TReg(aux_vec_idxs[0]); h->ld1r(aux.s, table_val2("two")); h->uni_fmul(aux.s, src.s, aux.s); diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index b71fb0a67f2a19..0152a5bd3d99e1 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -477,6 +477,8 @@ class jit_mod_emitter : public jit_emitter { size_t get_inputs_count() const override; + size_t get_aux_vecs_count() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp index 1f6bd487032730..925a6d28697d41 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp @@ -14,20 +14,27 @@ namespace intel_cpu { CPURuntimeConfigurator::CPURuntimeConfigurator() : ov::snippets::RuntimeConfigurator(std::make_shared()) { } -void CPURuntimeConfigurator::update(const std::shared_ptr& linear_ir) { - RuntimeConfigurator::update(linear_ir); - +void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRPtr& linear_ir) { if (linear_ir->is_dynamic()) { - get_kernel_executor_table()->update_state(); + update_loop_info(linear_ir); update_loop_args(linear_ir); + // Update KernelExecutor Table should be before `update_buffer_scratchpad_size` + // because `ComputeAllocationSize` depends on subtensors which are updated in the table + get_kernel_executor_table()->update_state(linear_ir); + update_buffer_scratchpad_size(linear_ir); } + + m_config->master_shape = linear_ir->get_master_shape(); + + update_data_offsets(); + update_latest_shapes(); } -void CPURuntimeConfigurator::init_tensor_rank(const std::shared_ptr& linear_ir) const { +void CPURuntimeConfigurator::init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const { m_config->tensor_rank = std::max(linear_ir->get_master_shape().size(), rank6D); } -void CPURuntimeConfigurator::update_loop_args(const std::shared_ptr& linear_ir) const { +void CPURuntimeConfigurator::update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const { const auto& cpu_config = ov::as_type_ptr(m_config); OPENVINO_ASSERT(cpu_config, "CPURuntimeConfigurator expects CPURuntimeConfig"); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp index 6b3a54652097ae..f1a21e5982aa1c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp @@ -29,17 +29,17 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator { * @brief Update RuntimeConfig based on LinearIR * @param linear_ir LinearIR */ - void update(const std::shared_ptr& linear_ir) override; + void update(const ov::snippets::lowered::LinearIRPtr& linear_ir) override; /** * @brief Initializes tensor rank of config * @param linear_ir LinearIR */ - void init_tensor_rank(const std::shared_ptr& linear_ir) const override; + void init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const override; /** * @brief Calculate Loop parameters of Loop emitters and update these values in CPURuntimeConfig * @param linear_ir LinearIR */ - void update_loop_args(const std::shared_ptr& linear_ir) const; + void update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const; const size_t rank6D = 6; }; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp index 6b99097872db37..cb6dfeb741109a 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp @@ -41,7 +41,8 @@ void jit_loop_begin_emitter::validate_arguments(const std::vector &in, c // Note: the only expected output is work amount register (communicated to jit_loop_end_emitter) OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Invalid outputs size: expected 1 got " + std::to_string(out.size())); OV_CPU_JIT_EMITTER_ASSERT(loop_begin_label != nullptr && loop_end_label != nullptr, "has not inited labels!"); - OV_CPU_JIT_EMITTER_ASSERT(implication(is_work_amount_dynamic, !evaluate_once), "with dynamic work_amount cannot evaluate once!"); + OV_CPU_JIT_EMITTER_ASSERT(!snippets::utils::is_dynamic_value(wa_increment) || evaluate_once, + "loop increment might be dynamic only if loop evaluates once!"); } void jit_loop_begin_emitter::emit_code(const std::vector &in, const std::vector &out, @@ -52,7 +53,8 @@ void jit_loop_begin_emitter::emit_code(const std::vector &in, const std: void jit_loop_begin_emitter::emit_impl(const std::vector& in, const std::vector& out) const { // If the loop evaulate once, we can skip loop begin code emission - if (evaluate_once) + // If work_amount is dynamic, we should get runtime `work_amount` - it might be `zero` and we should skip loop evaluation + if (evaluate_once && !is_work_amount_dynamic) return; Reg64 reg_work_amount = Reg64(static_cast(out.back())); @@ -124,7 +126,8 @@ void jit_loop_end_emitter::validate_arguments(const std::vector &in, con "Invalid finalization_offsets size: expected: ", io_size, " got ", finalization_offsets.size()); OV_CPU_JIT_EMITTER_ASSERT(data_sizes.size() == io_size, "Invalid data_sizes size: expected: ", io_size, " got ", data_sizes.size()); OV_CPU_JIT_EMITTER_ASSERT(loop_end_label != nullptr && loop_begin_label != nullptr, "has not inited labels!"); - OV_CPU_JIT_EMITTER_ASSERT(implication(are_ptr_shifts_dynamic, !evaluate_once), "with dynamic data pointer shifts cannot evaluate once!"); + OV_CPU_JIT_EMITTER_ASSERT(!snippets::utils::is_dynamic_value(wa_increment) || evaluate_once, + "loop increment might be dynamic only if loop evaluates once!"); } void jit_loop_end_emitter::emit_code(const std::vector &in, const std::vector &out, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp index 6898fd18b587cd..e538c3baef28bb 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp @@ -4,6 +4,8 @@ #include "brgemm.hpp" +#include "snippets/lowered/loop_manager.hpp" + #include #include "common/utils.hpp" #include "dnnl_extension_utils.h" @@ -18,7 +20,7 @@ using namespace dnnl::impl::cpu::x64; namespace { size_t init_hash(dnnl_data_type_t dt_in0, dnnl_data_type_t dt_in1, float beta, bool is_with_amx, - bool is_with_comp, dnnl::impl::cpu::x64::cpu_isa_t isa) { + bool is_with_comp, dnnl::impl::cpu::x64::cpu_isa_t isa) { size_t seed = 0; #define HASH(X) seed = hash_combine(seed, X) HASH(dt_in0); HASH(dt_in1); @@ -41,7 +43,7 @@ BrgemmKernelConfig::BrgemmKernelConfig(const element::Type& in0_dtype, const ele } bool BrgemmKernelConfig::is_completed() const { - return !utils::one_of(0, m_M, m_N, m_K, m_LDA, m_LDB, m_LDC); + return !utils::one_of(0, m_M, m_N, m_K, m_LDA, m_LDB, m_LDC) || is_empty(); } bool BrgemmKernelConfig::operator==(const BrgemmKernelConfig& rhs) const { @@ -54,11 +56,22 @@ bool BrgemmKernelConfig::operator==(const BrgemmKernelConfig& rhs) const { } void BrgemmKernelConfig::update(dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, dnnl_dim_t LDA, dnnl_dim_t LDB, dnnl_dim_t LDC) { - m_M = M; m_N = N; m_K = K; - m_LDA = LDA; m_LDB = LDB; m_LDC = LDC; + // If M is zero, it means that Brgemm won't be executed (in Loop with work_amount = 0, for example) + // To process this case, we have to make this Config as empty (nullify runtime parameters) + if (utils::one_of(0, M, N, K)) { + m_M = 0; m_N = 0; m_K = 0; + m_LDA = 0; m_LDB = 0; m_LDC = 0; + } else { + m_M = M; m_N = N; m_K = K; + m_LDA = LDA; m_LDB = LDB; m_LDC = LDC; + } m_hash = compute_hash(); } +bool BrgemmKernelConfig::is_empty() const { + return everyone_is(0, m_M, m_N, m_K, m_LDA, m_LDB, m_LDC); +} + BrgemmKernelConfig::operator amx_tile_config_t() const { amx_tile_config_t res; res.M = m_M; res.N = m_N; res.K = m_K; @@ -115,6 +128,12 @@ BrgemmKernelExecutor::BrgemmKernelExecutor(ov::intel_cpu::MultiCacheWeakPtr kern std::shared_ptr BrgemmKernelExecutor::compile_kernel(const BrgemmKernelConfig& config) const { + std::shared_ptr compiled_kernel = std::make_shared(); + + // Brgemm is not executable - nothing to compile + if (config.is_empty()) + return compiled_kernel; + cpu::x64::brgemm_t desc; auto status = brgemm_desc_init(&desc, config.get_isa(), cpu::x64::brgemm_strd, config.get_dt_in0(), config.get_dt_in1(), @@ -122,10 +141,8 @@ std::shared_ptr BrgemmKernelExecutor::compile_kernel(const config.get_beta(), config.get_LDA(), config.get_LDB(), config.get_LDC(), config.get_M(), config.get_N(), config.get_K(), nullptr); - - auto compiled_kernel = std::make_shared(); - OV_CPU_JIT_EMITTER_ASSERT(status == dnnl_success, "Cannot initialize brgemm descriptor due to invalid params"); + if (config.is_with_amx()) { status = brgemm_init_tiles(desc, compiled_kernel->palette); OV_CPU_JIT_EMITTER_ASSERT(status == dnnl_success, "Cannot initialize brgemm tiles due to invalid params"); @@ -138,31 +155,49 @@ std::shared_ptr BrgemmKernelExecutor::compile_kernel(const return compiled_kernel; } -void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::ExpressionPtr& expr, BrgemmKernelConfig& config) const { - auto get_projected_input_subtensor = [](const snippets::lowered::PortDescriptorPtr& desc) { - // Note: for output shape you will need get_preordered_vdims() - auto shape = snippets::utils::get_planar_vdims(desc->get_shape(), desc->get_layout()); - auto subtensor = desc->get_subtensor(); - OV_CPU_JIT_EMITTER_ASSERT(subtensor.size() <= shape.size() && subtensor.size() == 2, - "Invalid subtensor + shape combination"); - auto shape_it = shape.rbegin(); - for (auto sub_it = subtensor.rbegin(); sub_it != subtensor.rend(); sub_it++, shape_it++) { - *sub_it = std::min(*sub_it, *shape_it); - } - return subtensor; - }; +void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::ExpressionPtr& expr, + const ov::snippets::lowered::LinearIRPtr& linear_ir, + BrgemmKernelConfig& config) const { const auto& input_pds = expr->get_input_port_descriptors(); const auto& output_pds = expr->get_output_port_descriptors(); OV_CPU_JIT_EMITTER_ASSERT((input_pds.size() == 2 || input_pds.size() == 3) && output_pds.size() == 1, "Invalid number of in/out port descriptors"); - // Update runtime-defined config fields: - // Matrix A (first input) + + const auto in0_shape = snippets::utils::get_planar_vdims(input_pds[0]->get_shape(), input_pds[0]->get_layout()); + const auto in1_shape = snippets::utils::get_planar_vdims(input_pds[1]->get_shape(), input_pds[1]->get_layout()); + auto in0_subtensor = input_pds[0]->get_subtensor(); + auto in1_subtensor = input_pds[1]->get_subtensor(); + + auto M = *++in0_subtensor.rbegin(); + auto K = *in0_subtensor.rbegin(); + auto N = *in1_subtensor.rbegin(); + + if (ov::snippets::utils::is_full_dim_value(M)) { + M = *++in0_shape.rbegin(); + } else { + const auto& loop_ids = expr->get_loop_ids(); + OPENVINO_ASSERT(!loop_ids.empty(), "Loop by dimension M is missed"); + // TODO [146125]: Loop by M is first one in `loop_ids` + const auto& expanded_loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_ids.front()); + M = expanded_loop_info->get_increment(); + input_pds[0]->set_subtensor_dim(1, M); + output_pds[0]->set_subtensor_dim(1, M); + } + + if (ov::snippets::utils::is_full_dim_value(K)) { + K = *in0_shape.rbegin(); + } else if (ov::snippets::utils::is_dynamic_value(K)) { + OPENVINO_THROW("Dynamic K is not supported"); + } + + if (ov::snippets::utils::is_full_dim_value(N)) { + N = *in1_shape.rbegin(); + } else if (ov::snippets::utils::is_dynamic_value(N)) { + OPENVINO_THROW("Dynamic N is not supported"); + } + const auto LDA = DIM_CAST(snippets::utils::get_dim_stride(expr->get_input_port(0))); - const auto& in0_subtensor = get_projected_input_subtensor(input_pds[0]); - const auto K = DIM_CAST(*in0_subtensor.rbegin()); - const auto M = DIM_CAST(*++in0_subtensor.rbegin()); - // Matrix B (second input) - // Non float input 1 => with data repacking + const auto LDC = DIM_CAST(snippets::utils::get_dim_stride(expr->get_output_port(0))); auto LDB = DIM_CAST(snippets::utils::get_dim_stride(expr->get_input_port(1))); const auto& brgemm_node = as_type_ptr(expr->get_node()); @@ -172,10 +207,8 @@ void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::Expression OV_CPU_JIT_EMITTER_ASSERT(!repacking_buffer_shape.empty(), "Repacking buffer shape mustn't be empty"); LDB = DIM_CAST(repacking_buffer_shape.back()); } - const auto N = DIM_CAST(*get_projected_input_subtensor(input_pds[1]).rbegin()); - // Matrix C (output) - const auto LDC = DIM_CAST(snippets::utils::get_dim_stride(expr->get_output_port(0))); - config.update(M, N, K, LDA, LDB, LDC); + + config.update(DIM_CAST(M), DIM_CAST(N), DIM_CAST(K), LDA, LDB, LDC); } void BrgemmKernelExecutor::execute(const BrgemmKernelExecutor* executor, call_args* args) { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp index b0dd9c465b66de..4dd52e21ca2dfd 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp @@ -24,6 +24,7 @@ struct BrgemmKernelConfig : public snippets::KernelExecutorBase::GenericConfig { return std::unique_ptr( new BrgemmKernelConfig(*this)); } void update(dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, dnnl_dim_t LDA, dnnl_dim_t LDB, dnnl_dim_t LDC); + bool is_empty() const; dnnl_data_type_t get_dt_in0() const { return m_static_params->dt_in0; } dnnl_data_type_t get_dt_in1() const { return m_static_params->dt_in1; } @@ -95,7 +96,9 @@ class BrgemmKernelExecutor : public CPUKernelExecutor compile_kernel(const BrgemmKernelConfig& c) const override; - void update_config(const ov::snippets::lowered::ExpressionPtr& expr, BrgemmKernelConfig& config) const override; + void update_config(const ov::snippets::lowered::ExpressionPtr& expr, + const ov::snippets::lowered::LinearIRPtr& linear_ir, + BrgemmKernelConfig& config) const override; }; #define GET_OFF_BRGEMM_ARGS(field) offsetof(BrgemmKernelExecutor::call_args, field) diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp index 91c95f0a478d3c..70ddbb3d79ee21 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp @@ -48,7 +48,7 @@ TppEmitter::TppEmitter(dnnl::impl::cpu::x64::jit_generator* h, io_port_descriptors.resize(num_kernel_args); // Note: this is needed mostly for Reduce operations, since they allow the last subternsor dim to be FULL_DIM; auto replace_full_dim = [](size_t dim, size_t replace_dim) { - if (dim == snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM) + if (ov::snippets::utils::is_full_dim_value(dim)) return replace_dim; return dim; }; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index dc24c611861a16..41c3011f8707ec 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -831,16 +831,8 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) { MemoryPtr ptr; auto weightCache = context->getWeightsCache(); if (weightCache != nullptr && memory::format_kind::blocked == intDesc->getDnnlDesc().get_format_kind()) { - const auto& format = intDesc->serializeFormat(); - const uint64_t data_hash = - weightCache->GetHashFunc().hash(static_cast(internalBlob->getData()), - internalBlob->getSize()); - - const std::string string_hash = name + "_" + std::to_string(indx) - + "_" + format - + "_" + std::to_string(internalBlob->getSize()) - + "_" + std::to_string(data_hash); - + const auto string_hash = + name + "_" + std::to_string(indx) + "_" + DnnlExtensionUtils::computeWeightsStringHash(internalBlob, intDesc); ptr = *weightCache->findOrCreate(string_hash, create); } else { ptr = create(); @@ -905,10 +897,7 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryD auto weightCache = context->getWeightsCache(); if (weightCache != nullptr) { - const std::string string_hash = getName() + "_" + format - + "_" + std::to_string(edgeMem->getSize()) - + "_" + std::to_string(*edgeMem->getDataAs()); - + const auto string_hash = DnnlExtensionUtils::computeWeightsStringHash(edgeMem, dstWeightDesc); ptr = *weightCache->findOrCreate(string_hash, create); } else { ptr = create(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp index d9cbd05a847231..c801eca5bbe13a 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp @@ -4,12 +4,15 @@ #include "nodes/executors/dnnl/dnnl_utils.hpp" +#include #include #include "cpu_memory.h" #include "memory_desc/dnnl_memory_desc.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/executors/executor.hpp" #include "nodes/reorder.h" +#include "utils/cpu_utils.hpp" namespace ov { namespace intel_cpu { @@ -86,8 +89,7 @@ MemoryPtr prepareWeightsMemory(const DnnlMemoryDescPtr srcWeightDesc, MemoryPtr ptr; if (globalWeightCache && dnnl::memory::format_kind::blocked == dstWeightDesc->getDnnlDesc().get_format_kind()) { - const std::string string_hash = format + "_" + std::to_string(weightsMem->getSize()) + "_" + - std::to_string(reinterpret_cast(weightsMem->getData())); + const auto string_hash = DnnlExtensionUtils::computeWeightsStringHash(weightsMem, dstWeightDesc); ptr = *globalWeightCache->findOrCreate(string_hash, create); } else { ptr = create(); diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 76e41db1cd06c0..da3dcafa4750ef 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -25,6 +25,8 @@ #include "utils/debug_capabilities.h" #include "utils/general_utils.h" +#include "fake_quantize.h" + using namespace dnnl; using namespace ov::element; @@ -94,6 +96,25 @@ bool FullyConnected::canFuse(const NodePtr& node) const { #if defined(OV_CPU_WITH_SHL) return false; #endif + if (node->getType() == Type::FakeQuantize) { + auto* fq = dynamic_cast(node.get()); + if (fq->getBroadcastingPolicy() != FakeQuantize::BroadcastingPolicy::PerTensor) { + const auto& dstShape = getOutputShapeAtPort(0); + auto dataRanks = dstShape.getRank(); + // only per-OC or per-Tensor fakequantize can be postOps + if (fq->getAxis() != dataRanks - 1) { + DEBUG_LOG("reject FakeQuantize ", + fq->getName(), + "(axis=", + fq->getAxis(), + ") from fusing into ", + getName(), + " with dst shape ", + dstShape); + return false; + } + } + } return canFuseSimpleOperation(node); } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp index bd05801c139dc8..34c00a527d2ce7 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp @@ -20,9 +20,11 @@ namespace XARCH { // avx512/avx2 register length in byte static constexpr size_t vec_len_avx512 = 64lu; static constexpr size_t vec_len_avx2 = 32lu; +static constexpr size_t vec_len_neon = 16lu; // avx512/avx2 register length in float static constexpr size_t vec_len_f32_avx512 = vec_len_avx512 / sizeof(float); static constexpr size_t vec_len_f32_avx2 = vec_len_avx2 / sizeof(float); +static constexpr size_t vec_len_f32_neon = vec_len_neon / sizeof(float); #ifdef HAVE_AVX512F inline __m512 cvt_bf16_to_fp32(const __m256i src) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index e4648ece365e9a..5177f4013319e6 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -13,12 +13,17 @@ # include #endif + #include "openvino/core/type/bfloat16.hpp" #include "openvino/core/parallel.hpp" #include "mha_single_token.hpp" #include "common.hpp" #include "softmax_kernel.hpp" +#if defined(OPENVINO_ARCH_ARM64) +# include +#endif + namespace ov { namespace Extensions { namespace Cpu { @@ -53,6 +58,13 @@ void cvt_copy(TA* dst, TB* src, size_t n) { auto vb = mm256_uni_loadu_ps(src + i); mm256_uni_storeu_ps(dst + i, vb); } +#elif defined(OPENVINO_ARCH_ARM64) + int vec_len_f32_neon = 4; + auto _dst = reinterpret_cast(dst); + for (; i + vec_len_f32_neon <= n; i += vec_len_f32_neon) { + float32x4_t vb1 = vld1q_f32(src + i); + vst1q_f32(_dst + i, vb1); + } #endif for (; i < n; i++) { dst[i] = src[i]; @@ -78,6 +90,15 @@ static void attn_acc_value(float* out, float weight, T* v, size_t S, float* scal v_out = _mm256_fmadd_ps(attn_w_vec_fp32, v_value, v_out); mm256_uni_storeu_ps(out + i, v_out); } +#elif defined(OPENVINO_ARCH_ARM64) + float32x4_t attn_w_vec_fp32 = vdupq_n_f32(weight); + auto _v = reinterpret_cast(v); + for (; i + vec_len_f32_neon <= S; i += vec_len_f32_neon) { + float32x4_t v_value = vld1q_f32(_v + i); + float32x4_t v_out = vld1q_f32(out + i); + v_out = vmlaq_f32(v_out, attn_w_vec_fp32, v_value); + vst1q_f32(out + i, v_out); + } #endif for (; i < S; i++) { out[i] += weight * v[i]; @@ -308,6 +329,47 @@ static float sum_q_head(T* a, size_t n) { vsum0 = _mm256_add_ps(vsum0, vsum2); hsum(vsum0); sum = _mm256_cvtss_f32(vsum0); +#elif defined(OPENVINO_ARCH_ARM64) + size_t vec_len_f32_neon = 4; + float32x4_t vsum0 = vdupq_n_f32(0.0f); + float32x4_t vsum1 = vdupq_n_f32(0.0f); + float32x4_t vsum2 = vdupq_n_f32(0.0f); + float32x4_t vsum3 = vdupq_n_f32(0.0f); + + for (; i + 4 * vec_len_f32_neon <= n; i += vec_len_f32_neon * 4) { + float32x4_t va0 = vld1q_f32(a + i); + float32x4_t va1 = vld1q_f32(a + i + vec_len_f32_neon); + float32x4_t va2 = vld1q_f32(a + i + vec_len_f32_neon * 2); + float32x4_t va3 = vld1q_f32(a + i + vec_len_f32_neon * 3); + + vsum0 = vaddq_f32(va0, vsum0); + vsum1 = vaddq_f32(va1, vsum1); + vsum2 = vaddq_f32(va2, vsum2); + vsum3 = vaddq_f32(va3, vsum3); + } + if (i + 2 * vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(a + i); + float32x4_t va1 = vld1q_f32(a + i + vec_len_f32_neon); + + vsum0 = vaddq_f32(va0, vsum0); + vsum1 = vaddq_f32(va1, vsum1); + i += 2 * vec_len_f32_neon; + } + if (i + vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(a + i); + vsum0 = vaddq_f32(va0, vsum0); + i += vec_len_f32_neon; + } + + vsum0 = vaddq_f32(vsum0, vsum1); + vsum2 = vaddq_f32(vsum2, vsum3); + vsum0 = vaddq_f32(vsum0, vsum2); + + float32x2_t sum_low = vget_low_f32(vsum0); + float32x2_t sum_high = vget_high_f32(vsum0); + sum_low = vadd_f32(sum_low, sum_high); + sum_low = vpadd_f32(sum_low, sum_low); + sum = vget_lane_f32(sum_low, 0); #endif for (; i < n; i++) { @@ -406,7 +468,59 @@ static float dot_product(TA* a, TB* b, size_t n, float* scale, float* zp, float* vsum0 = _mm256_add_ps(vsum0, vsum2); hsum(vsum0); sum = _mm256_cvtss_f32(vsum0); + +#elif defined(OPENVINO_ARCH_ARM64) + float32x4_t vsum0 = vdupq_n_f32(0.0f); + float32x4_t vsum1 = vdupq_n_f32(0.0f); + float32x4_t vsum2 = vdupq_n_f32(0.0f); + float32x4_t vsum3 = vdupq_n_f32(0.0f); + + auto _a = reinterpret_cast(a); + auto _b = reinterpret_cast(b); + + for (; i + 4 * vec_len_f32_neon <= n; i += vec_len_f32_neon * 4) { + float32x4_t va0 = vld1q_f32(_a + i); + float32x4_t va1 = vld1q_f32(_a + i + vec_len_f32_neon); + float32x4_t va2 = vld1q_f32(_a + i + vec_len_f32_neon * 2); + float32x4_t va3 = vld1q_f32(_a + i + vec_len_f32_neon * 3); + + float32x4_t vb0 = vld1q_f32(_b + i); + float32x4_t vb1 = vld1q_f32(_b + i + vec_len_f32_neon); + float32x4_t vb2 = vld1q_f32(_b + i + vec_len_f32_neon * 2); + float32x4_t vb3 = vld1q_f32(_b + i + vec_len_f32_neon * 3); + + vsum0 = vmlaq_f32(vsum0, va0, vb0); + vsum1 = vmlaq_f32(vsum1, va1, vb1); + vsum2 = vmlaq_f32(vsum2, va2, vb2); + vsum3 = vmlaq_f32(vsum3, va3, vb3); + } + if (i + 2 * vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(_a + i); + float32x4_t va1 = vld1q_f32(_a + i + vec_len_f32_neon); + + float32x4_t vb0 = vld1q_f32(_b + i); + float32x4_t vb1 = vld1q_f32(_b + i + vec_len_f32_neon); + + vsum0 = vmlaq_f32(vsum0, va0, vb0); + vsum1 = vmlaq_f32(vsum1, va1, vb1); + i += 2 * vec_len_f32_neon; + } + if (i + vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(_a + i); + float32x4_t vb0 = vld1q_f32(_b + i); + vsum0 = vmlaq_f32(vsum0, va0, vb0); + i += vec_len_f32_neon; + } + + vsum0 = vaddq_f32(vsum0, vsum1); + vsum2 = vaddq_f32(vsum2, vsum3); + vsum0 = vaddq_f32(vsum0, vsum2); + + float32x2_t temp_sum = vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); + temp_sum = vpadd_f32(temp_sum, temp_sum); + sum = vget_lane_f32(temp_sum, 0); #endif + for (; i < n; i++) { sum += a[i] * b[i]; } @@ -593,6 +707,18 @@ static void attn_reduce(T* dst, float* temp, size_t M, size_t S, size_t temp_str } mm256_uni_storeu_ps(dst + i, result_vec_fp32); } +#elif defined(OPENVINO_ARCH_ARM64) + auto _dst = reinterpret_cast(dst); + for (; i + vec_len_f32_neon <= S; i += vec_len_f32_neon) { + auto* src = temp + i; + auto result_vec_fp32 = vdupq_n_f32(0.0f); + for (size_t m = 0; m < M; m++) { + auto o_vec_fp32 = vld1q_f32(src); + result_vec_fp32 = vaddq_f32(result_vec_fp32, o_vec_fp32); + src += temp_stride; + } + vst1q_f32(_dst + i, result_vec_fp32); + } #endif for (; i < S; i++) { auto* src = temp + i; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 3c9bfcc5ea064b..d71faef96923d0 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -31,7 +31,7 @@ using namespace snippets::lowered; namespace { std::vector make_subtensor(const ov::Shape& tensor) { - return std::vector(std::min(tensor.size(), size_t(2)), PortDescriptor::ServiceDimensions::FULL_DIM); + return std::vector(std::min(tensor.size(), size_t(2)), ov::snippets::utils::get_full_dim_value()); } template void set_full_port_desc(const T& port) { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp index 044a1f724e78c3..3c8e4caf00c9b0 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp @@ -9,6 +9,7 @@ #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/loop_manager.hpp" #include "snippets/lowered/pass/pass.hpp" +#include "snippets/lowered/pass/propagate_subtensors.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/utils/utils.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -56,6 +57,15 @@ LinearIR::constExprIt BrgemmBlocking::get_loop_begin_pos(LinearIR& linear_ir, co return loop_begin_it; } +snippets::lowered::SpecificIterationHandlers BrgemmBlocking::get_default_blocking_loop_handlers(size_t work_amount, size_t block_size) { + SpecificIterationHandlers handlers; + const auto tail_size = snippets::utils::is_dynamic_value(work_amount) ? snippets::utils::get_dynamic_value() : work_amount % block_size; + if (tail_size != 0) + handlers.register_pass(tail_size); + handlers.register_pass(); + return handlers; +} + bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmBlocking") const auto& loop_manager = linear_ir.get_loop_manager(); @@ -107,16 +117,24 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea const auto block_size_n = snippets::utils::is_dynamic_value(n) ? brgemm->get_n_block_size() : std::min(brgemm->get_n_block_size(), n); const auto block_size_k = snippets::utils::is_dynamic_value(k) ? brgemm->get_k_block_size() : std::min(brgemm->get_k_block_size(), k); - *++in_0_subtensor.rbegin() = block_size_m; - *++out_subtensor.rbegin() = block_size_m; - *in_1_subtensor.rbegin() = block_size_n; - *out_subtensor.rbegin() = block_size_n; - *in_0_subtensor.rbegin() = block_size_k; - *++in_1_subtensor.rbegin() = block_size_k; + const bool m_blocking = block_size_m != m; + const bool n_blocking = block_size_n != n; + const bool k_blocking = block_size_k != k; - brgemm_expr->get_input_port_descriptor(0)->set_subtensor(in_0_subtensor); - brgemm_expr->get_input_port_descriptor(1)->set_subtensor(in_1_subtensor); - brgemm_expr->get_output_port_descriptor(0)->set_subtensor(out_subtensor); + // If block_size is dynamic, it means that Brgemm will process full tensor: + // subtensor[i] = FULL_DIM as by default + if (!snippets::utils::is_dynamic_value(block_size_m) && m_blocking) { + brgemm_expr->get_input_port_descriptor(0)->set_subtensor_dim(1, block_size_m); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor_dim(1, block_size_m); + } + if (!snippets::utils::is_dynamic_value(block_size_n) && n_blocking) { + brgemm_expr->get_input_port_descriptor(1)->set_subtensor_dim(0, block_size_n); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor_dim(0, block_size_n); + } + if (!snippets::utils::is_dynamic_value(block_size_k) && k_blocking) { + brgemm_expr->get_input_port_descriptor(0)->set_subtensor_dim(0, block_size_k); + brgemm_expr->get_input_port_descriptor(1)->set_subtensor_dim(1, block_size_k); + } const bool need_brgemm_copy_b = brgemm_cpu && with_repacking(brgemm_cpu->get_type()); ov::snippets::lowered::ExpressionPtr copy_b_expr = nullptr; @@ -154,7 +172,9 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea if (!include_repacking && brgemm_cpu && with_compensations(brgemm_cpu->get_type())) entries.emplace_back(brgemm_expr->get_input_port(2), false); const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), true)}; - loop_manager->mark_loop(loop_begin_it, loop_end_it, m, block_size_m, 1, entries, exits); + + const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, m, block_size_m, 1, entries, exits, false); + loop_manager->get_loop_info(id)->set_handlers(get_default_blocking_loop_handlers(m, block_size_m)); }; auto mark_n_blocking = [&]() { @@ -165,7 +185,9 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea LoopPort(brgemm_expr->get_input_port(0), false), LoopPort(need_brgemm_copy_b ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1), true)}; const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), true)}; - loop_manager->mark_loop(loop_begin_it, loop_end_it, n, block_size_n, 0, entries, exits); + + const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, n, block_size_n, 0, entries, exits, false); + loop_manager->get_loop_info(id)->set_handlers(get_default_blocking_loop_handlers(n, block_size_n)); }; auto mark_k_blocking = [&]() { @@ -176,14 +198,14 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea LoopPort(brgemm_expr->get_input_port(0), true, 0), LoopPort(need_brgemm_copy_b ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1), true, 1)}; const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), false)}; - const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, k, block_size_k, entries, exits); - const auto& loop_info = loop_manager->get_loop_info(id); - loop_info->register_pass_to_handler(0.f); + + auto handlers = get_default_blocking_loop_handlers(k, block_size_k); + handlers.register_pass(0.f); + + const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, k, block_size_k, entries, exits, false); + loop_manager->get_loop_info(id)->set_handlers(handlers); }; - const bool k_blocking = block_size_k != k; - const bool n_blocking = block_size_n != n; - const bool m_blocking = block_size_m != m; // It is not necessary to include copyB in loop by M if there are no blocking by KN const bool include_repacking_in_loop = k_blocking || n_blocking; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp index cdc2d05cffd1e5..4d29267f034fc9 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp @@ -5,6 +5,7 @@ #pragma once #include "snippets/lowered/pass/pass.hpp" +#include "snippets/lowered/specific_loop_iter_handlers.hpp" namespace ov { namespace intel_cpu { @@ -24,6 +25,8 @@ class BrgemmBlocking : public snippets::lowered::pass::RangedPass { snippets::lowered::LinearIR::constExprIt begin, snippets::lowered::LinearIR::constExprIt end) override; + static snippets::lowered::SpecificIterationHandlers get_default_blocking_loop_handlers(size_t work_amount, size_t block_size); + private: static snippets::lowered::LinearIR::constExprIt move_new_memory_buffer(snippets::lowered::LinearIR& linear_ir, const snippets::lowered::LinearIR::constExprIt& brgemm_it); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp index d5e96b2a7339ba..a8281ad1d02da6 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp @@ -4,6 +4,7 @@ #include "cpu_iter_handlers.hpp" +#include "snippets/op/loop.hpp" #include "snippets/lowered/loop_manager.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -34,6 +35,19 @@ std::shared_ptr SetBrgemmBeta::merge(const st return nullptr; return merged_pass; } + +bool SetEvaluateOnce::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { + const auto& loop_end = ov::as_type_ptr(end->get()->get_node()); + OPENVINO_ASSERT(loop_end, "SetEvaluateOnce expected LoopEnd node in iterator `end`."); + const auto& loop_info = linear_ir.get_loop_manager()->get_loop_info(loop_end->get_id()); + loop_info->set_evaluate_once(true); + return true; +} + +std::shared_ptr SetEvaluateOnce::merge(const std::shared_ptr& other) { + return !other || ov::is_type(other) ? std::make_shared() : nullptr; +} + } // namespace pass } // namespace intel_cpu } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp index 5da97e29796f70..24697c2f50f6a6 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp @@ -25,7 +25,23 @@ class SetBrgemmBeta : public snippets::lowered::pass::RangedPass { std::shared_ptr merge(const std::shared_ptr& other) override; private: - float m_beta; + float m_beta = 0; +}; + +/** + * @interface SetEvaluateOnce + * @brief The pass set `evaluate once = true` only to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end`. + * The pointer arithmetic should be updated in the separate optimization `OptimizeLoopSingleEvaluation` + * @ingroup snippets + */ +class SetEvaluateOnce : public snippets::lowered::pass::RangedPass { +public: + SetEvaluateOnce() = default; + OPENVINO_RTTI("SetEvaluateOnce", "RangedPass") + bool run(snippets::lowered::LinearIR& linear_ir, + snippets::lowered::LinearIR::constExprIt begin, + snippets::lowered::LinearIR::constExprIt end) override; + std::shared_ptr merge(const std::shared_ptr& other) override; }; } // namespace pass } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp index b3c04fb7833db9..da83038f5455f8 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp @@ -3,6 +3,7 @@ // #include "snippets/itt.hpp" +#include "snippets/utils/utils.hpp" #include "eltwise_to_eltwise_tpp.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -40,14 +41,12 @@ EltwiseToEltwiseTPP::EltwiseToEltwiseTPP() { OPENVINO_ASSERT(tpp_eltwise, "Failed to create TPP node"); const size_t M_block = 32; - const size_t N_block = ov::is_type(node) ? - snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM : - 64; + const size_t N_block = ov::is_type(node) ? ov::snippets::utils::get_full_dim_value() : 64; ov::replace_node_update_name(node, tpp_eltwise); for (size_t i = 0; i < node->get_input_size(); i++) - snippets::lowered::set_port_desc(tpp_eltwise->input(i), {M_block, N_block}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(tpp_eltwise->input(i), {M_block, N_block}); - snippets::lowered::set_port_desc(tpp_eltwise->output(0), {M_block, N_block}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(tpp_eltwise->output(0), {M_block, N_block}); return true; }; diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp index a420ed2cbfea22..4f38eddc2bde0f 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp @@ -74,7 +74,7 @@ size_t get_leading_dim(ExpressionPort port, const snippets::lowered::LoopManager bool full_dim_substituted = false; for (size_t i = 1; i <= subtensor.size(); i++) { const auto idx = subtensor.size() - i; - if (subtensor[idx] == snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM) { + if (ov::snippets::utils::is_full_dim_value(subtensor[idx])) { // the reason that we don't support FULL_DIM substitution for an arbitrary layout is that // the layout and subtersor can (and usually do) have different ranks full_dim_substituted = true; diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp index 5ea5b135ba595a..0b9f41d47aa0da 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp @@ -42,9 +42,9 @@ ScalarToScalarTPP::ScalarToScalarTPP() { tpp_scalar->set_friendly_name(node->get_friendly_name()); ov::replace_node_update_name(node, tpp_scalar); const auto& out = tpp_scalar->output(0); - snippets::lowered::set_port_desc(out, {1}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(out, {1}); for (const auto& in : out.get_target_inputs()) - snippets::lowered::set_port_desc(in, {1}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(in, {1}); return true; }; diff --git a/src/plugins/intel_cpu/src/weights_cache.cpp b/src/plugins/intel_cpu/src/weights_cache.cpp index eed92f5977cffe..65fd3644ad4215 100644 --- a/src/plugins/intel_cpu/src/weights_cache.cpp +++ b/src/plugins/intel_cpu/src/weights_cache.cpp @@ -10,8 +10,6 @@ namespace ov { namespace intel_cpu { -const SimpleDataHash WeightsSharing::simpleCRC; - WeightsSharing::SharedMemory::SharedMemory( std::unique_lock && lock, const MemoryInfo::Ptr & memory, diff --git a/src/plugins/intel_cpu/src/weights_cache.hpp b/src/plugins/intel_cpu/src/weights_cache.hpp index 70c62569cdeb47..f0401700e49719 100644 --- a/src/plugins/intel_cpu/src/weights_cache.hpp +++ b/src/plugins/intel_cpu/src/weights_cache.hpp @@ -22,31 +22,6 @@ namespace ov { namespace intel_cpu { - -class SimpleDataHash { -public: - SimpleDataHash() { - for (int i = 0; i < kTableSize; i++) { - uint64_t c = i; - for (int j = 0; j < 8; j++) - c = ((c & 1) ? 0xc96c5795d7870f42 : 0) ^ (c >> 1); - table[i] = c; - } - } - // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182 - uint64_t hash(const unsigned char* data, size_t size) const { - uint64_t crc = 0; - for (size_t idx = 0; idx < size; idx++) - crc = table[(unsigned char)crc ^ data[idx]] ^ (crc >> 8); - - return ~crc; - } - -protected: - static constexpr int kTableSize = 256; - uint64_t table[kTableSize]; -}; - /** * Caching store of Memory objects * Will return a cached object or create new one @@ -94,12 +69,9 @@ class WeightsSharing { SharedMemory::Ptr get(const std::string& key) const; - static const SimpleDataHash& GetHashFunc () { return simpleCRC; } - protected: mutable std::mutex guard; std::unordered_map sharedWeights; - static const SimpleDataHash simpleCRC; }; /** diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp index 3daa819cd4854d..83faa2c06ec6f6 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp @@ -1108,6 +1108,45 @@ INSTANTIATE_TEST_SUITE_P( testParamsDynamicFusingFullUndefShapes, MatMulLayerCPUTest::getTestCaseName); +class FCNotFuseFQCPUTest : public MatMulLayerCPUTest { + void SetUp() override { + MatMulLayerCPUTest::SetUp(); + expectPostOpsToBeFused = false; + } +}; + +TEST_P(FCNotFuseFQCPUTest, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, cpuNodeType); +} + +const std::vector& notFuseSmoke() { + static const std::vector params = { + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, true}}, + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, true}}, + + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {false, false}}, + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {true, true}}, + + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}}, + }; + return params; +} + +const auto notFuseTestParamsSmoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(notFuseSmoke()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(utils::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn({fusingFakeQuantizePerBatch, fusingFakeQuantizeFullTensor}), + ::testing::ValuesIn({CPUSpecificParams{{}, {}, {""}, "any_type"}})); + +INSTANTIATE_TEST_SUITE_P(smoke_FC, FCNotFuseFQCPUTest, notFuseTestParamsSmoke, FCNotFuseFQCPUTest::getTestCaseName); + } // namespace } // namespace MatMul } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp new file mode 100644 index 00000000000000..107d669f442f80 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/node_builders/constant.hpp" +#include "common_test_utils/node_builders/fake_quantize.hpp" +#include "common_test_utils/node_builders/reshape.hpp" +#include "openvino/openvino.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +enum class FQInterval { U8, I8 }; +inline std::ostream& operator<<(std::ostream& os, FQInterval interval) { + switch (interval) { + case FQInterval::U8: + os << "U8"; + break; + case FQInterval::I8: + os << "I8"; + break; + default: + OPENVINO_THROW("Unknown FQInterval"); + } + return os; +} + +typedef std::tuple QuantizedMatMulsWithSharedWeightsParans; + +/* This test verifies the correctness of the hash function computation for the shared weights. + Specifically, it checks that when one op requires compensations computation and second one does not, + the resulting hashes are not identical, and the weights are repacked for each op separately +*/ +class QuantizedMatMulsWithSharedWeightsTest + : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + InputShape shape1; + InputShape shape2; + FQInterval interval1; + FQInterval interval2; + std::tie(shape1, shape2, interval1, interval2) = obj.param; + std::ostringstream result; + result << "IS1=" << shape1 << "IS2=" << shape2 << "FQInterval1=" << interval1 << "FQInterval2=" << interval2; + return result.str(); + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + abs_threshold = 1e-4; + + InputShape shape1; + InputShape shape2; + FQInterval interval1; + FQInterval interval2; + std::tie(shape1, shape2, interval1, interval2) = this->GetParam(); + init_input_shapes({shape1, shape2}); + + const auto weights = ov::test::utils::make_constant(ov::element::i8, {16, 16}); + const auto convert = std::make_shared(weights, ov::element::f32); + const auto scale = ov::test::utils::make_constant(ov::element::f32, {16, 1}, ov::test::utils::InputGenerateData(0, 1, 5)); + const auto mul = std::make_shared(convert, scale); + + auto build_fq = [](const ov::Output& parent, FQInterval interval_type) { + const auto low = interval_type == FQInterval::I8 ? std::vector{-12.8f} : std::vector{0.f}; + const auto high = interval_type == FQInterval::I8 ? std::vector{12.7f} : std::vector{25.5f}; + return ov::test::utils::make_fake_quantize(parent, ov::element::f32, 256, {1, 1, 1, 1}, low, high, low, high); + }; + + const auto param1 = std::make_shared(ov::element::f32, inputDynamicShapes[0]); + const auto fq1 = build_fq(param1, interval1); + const auto mm1 = std::make_shared(fq1, mul, false, true); + + const auto param2 = std::make_shared(ov::element::f32, inputDynamicShapes[1]); + const auto fq2 = build_fq(param2, interval2); + const auto mm2 = std::make_shared(fq2, mul, false, true); + + function = std::make_shared(ov::OutputVector{mm1, mm2}, ov::ParameterVector{param1, param2}); + } +}; + +TEST_P(QuantizedMatMulsWithSharedWeightsTest, CompareWithRefs) { + run(); +} + +namespace { + +std::vector shapes1{{{-1, -1, -1, 16}, {{1, 1, 15, 16}, {1, 1, 12, 16}, {1, 1, 15, 16}}}}; +std::vector shapes2{{{-1, -1, -1, 16}, {{1, 1, 12, 16}, {1, 1, 15, 16}, {1, 1, 12, 16}}}}; +INSTANTIATE_TEST_SUITE_P(smoke_CustomTest, QuantizedMatMulsWithSharedWeightsTest, + ::testing::Combine( + ::testing::ValuesIn(shapes1), + ::testing::ValuesIn(shapes2), + ::testing::Values(FQInterval::U8, FQInterval::I8), + ::testing::Values(FQInterval::U8, FQInterval::I8)), + QuantizedMatMulsWithSharedWeightsTest::getTestCaseName); +} // namespace +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp index ae5c19559e5a7b..066d81d1f37f36 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -92,6 +92,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues1 @@ -171,6 +172,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues2 diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index 778bcba7a235a0..1089bdc3faffaa 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -66,11 +66,39 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, MatMul, std::vector> input_shapes_dynamic{ + // All dimensions are dynamic { {PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 64}, {2, 2, 10, 20}, {2, 2, 100, 80}, - {2, 2, 10, 20}, {2, 1, 32, 64}}}, + {2, 2, 10, 20}, {2, 1, 32, 64}, {2, 3, 64, 55}}}, {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {2, 2, 20, 30}, {2, 2, 80, 120}, - {2, 2, 20, 30}, {1, 3, 64, 128}}} + {2, 2, 20, 30}, {1, 3, 64, 128}, {2, 3, 55, 128}}} + }, + // Only M dimension is dynamic + one one loop by M + { + {PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}, {2, 2, 64, 64}, {2, 2, 35, 64}, + {2, 2, 120, 64}, {2, 2, 15, 64}, {2, 2, 35, 64}}}, + {PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}, + {1, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}}} + }, + // Only M dimension is dynamic + all Loops (by M, N, K) + { + {PartialShape{2, 2, -1, 550}, {{2, 2, 64, 550}, {2, 2, 16, 550}, {2, 2, 35, 550}, + {2, 2, 16, 550}, {2, 2, 70, 550}, {2, 2, 64, 550}}}, + {PartialShape{2, 1, 550, 70}, {{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}, + {2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}}} + }, + // Only K dimension is dynamic + { + {PartialShape{2, 2, 70, -1}, {{2, 2, 70, 128}, {2, 2, 70, 10}, {2, 2, 70, 33}, + {2, 2, 70, 35}, {2, 2, 70, 100}}}, + {PartialShape{2, 2, -1, 70}, {{2, 2, 128, 70}, {2, 2, 10, 70}, {2, 2, 33, 70}, + {2, 2, 35, 70}, {2, 2, 100, 70}}} + }, + // Only N dimension is dynamic + { + {PartialShape{}, {{2, 2, 65, 550}}}, + {PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70}, + {2, 2, 550, 12}, {2, 2, 550, 10}}} }, }; diff --git a/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp index 39e60bdfe8a235..6f5e559201b30e 100644 --- a/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp +++ b/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp @@ -58,7 +58,11 @@ void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr postOpMgrPtr; std::vector fusedOps; bool checkFusingPosition = true; + bool expectPostOpsToBeFused = true; }; static int getChannelAxis(const ov::AxisSet &axes, bool keep_dims) { @@ -304,6 +305,26 @@ const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared< return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}}; +const auto fusingFakeQuantizePerBatch = fusingSpecificParams{std::make_shared(std::vector{ + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + const auto shape = cfg.input->get_output_partial_shape(0); + ov::Shape perBatchSize(shape.size(), 1); + perBatchSize[0] = shape[0].get_length(); + return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, perBatchSize); + }, "FakeQuantize(PerBatch)"}}), {"FakeQuantize"}}; + +const auto fusingFakeQuantizeFullTensor = fusingSpecificParams{std::make_shared(std::vector{ + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + const auto shape = cfg.input->get_output_partial_shape(0); + ov::Shape fullTensorShape(shape.size(), 1); + for (size_t axis = 0; axis < shape.size(); axis++) { + fullTensorShape[axis] = shape[axis].get_length(); + } + return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, fullTensorShape); + }, "FakeQuantize(FullTensor)"}}), {"FakeQuantize"}}; + const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared(std::vector{ {[](postNodeConfig& cfg){ auto localPrc = cfg.input->get_element_type(); diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp index ef0ffcd70e6c39..82cbcdfa2c21f3 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp @@ -7,6 +7,7 @@ #include "lir_test_utils.hpp" #include "openvino/opsets/opset10.hpp" #include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_info.hpp" #include "snippets/snippets_isa.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -22,6 +23,7 @@ using namespace ov::snippets; using BRGEMM_TYPE = intel_cpu::brgemm_utils::BRGEMM_TYPE; namespace { + void create_brgemm_loop_infos(const LinearIRPtr& linear_ir, const ExpressionPtr& brgemm_expr, size_t m = 0, size_t m_blk = 0, @@ -31,21 +33,30 @@ void create_brgemm_loop_infos(const LinearIRPtr& linear_ir, const bool n_block = k != 0 && k_blk != 0; const bool m_block = m != 0 && m_blk != 0; if (k_block) { - create_and_add_unified_loop_info(linear_ir, k, k_blk, - {LoopPort(brgemm_expr->get_input_port(0)), LoopPort(brgemm_expr->get_input_port(1), true, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), false)}); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(0); + const auto loop_info = + std::make_shared(k, k_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0)), + LoopPort(brgemm_expr->get_input_port(1), true, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), false)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(k, k_block)); loop_info->register_pass_to_handler(0.f); + linear_ir->get_loop_manager()->add_loop_info(loop_info); } if (n_block) { - create_and_add_unified_loop_info(linear_ir, n, n_blk, - {LoopPort(brgemm_expr->get_input_port(0), false), LoopPort(brgemm_expr->get_input_port(1))}, - {LoopPort(brgemm_expr->get_output_port(0))}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(n, n_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), false), + LoopPort(brgemm_expr->get_input_port(1))}, + std::vector{LoopPort(brgemm_expr->get_output_port(0))}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(n, n_block))); } if (m_block) { - create_and_add_unified_loop_info(linear_ir, m, m_blk, - {LoopPort(brgemm_expr->get_input_port(0), true, 1), LoopPort(brgemm_expr->get_input_port(1), false, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(m, m_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), true, 1), + LoopPort(brgemm_expr->get_input_port(1), false, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), true, 1)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(m, m_block))); } } @@ -59,22 +70,31 @@ void create_brgemm_with_copy_b_loop_infos(const LinearIRPtr& linear_ir, const bool n_block = k != 0 && k_blk != 0; const bool m_block = m != 0 && m_blk != 0; if (k_block) { - create_and_add_unified_loop_info(linear_ir, k, k_blk, - {LoopPort(brgemm_expr->get_input_port(0)), LoopPort(copy_b_expr->get_input_port(0), true, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), false)}); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(0); + const auto loop_info = + std::make_shared(k, k_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0)), + LoopPort(copy_b_expr->get_input_port(0), true, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), false)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(k, k_block)); loop_info->register_pass_to_handler(0.f); + linear_ir->get_loop_manager()->add_loop_info(loop_info); } if (n_block) { - create_and_add_unified_loop_info(linear_ir, n, n_blk, - {LoopPort(brgemm_expr->get_input_port(0), false), LoopPort(copy_b_expr->get_input_port(0))}, - {LoopPort(brgemm_expr->get_output_port(0))}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(n, n_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), false), + LoopPort(copy_b_expr->get_input_port(0))}, + std::vector{LoopPort(brgemm_expr->get_output_port(0))}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(n, n_block))); } if (m_block) { const auto& second_input_port = k_block || n_block ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1); - create_and_add_unified_loop_info(linear_ir, m, m_blk, - {LoopPort(brgemm_expr->get_input_port(0), true, 1), LoopPort(second_input_port, false, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(m, m_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), true, 1), + LoopPort(second_input_port, false, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), true, 1)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(m, m_block))); } } } // namespace @@ -148,7 +168,8 @@ TEST_F(BrgemmBlockingTest, BlockingIsNotNeeded) { auto brgemm = linear_ir_ref->push_node(data_a.second, data_b.second, BRGEMM_TYPE::STAND_ALONE, 0, 0, 0, layout, layout, layout, m, k, n); brgemm.second->set_beta(0.f); - init_expr_descriptors(*brgemm.first, {{m, k}, {k, n}, {m, n}}); + const auto full_subtensor = VectorDims(2, ov::snippets::utils::get_full_dim_value()); + init_expr_descriptors(*brgemm.first, std::vector(3, full_subtensor)); auto result = linear_ir_ref->push_node(brgemm.second); } } @@ -201,6 +222,7 @@ TEST_F(BrgemmBlockingTest, WithDataRepackingOnlyByM) { const ov::PartialShape input_shape_b{1, 16, 64, 384}; const auto precision_a = ov::element::u8; const auto precision_b = ov::element::i8; + const auto full = ov::snippets::utils::get_full_dim_value(); { auto data_a = linear_ir->push_node(precision_a, input_shape_a); @@ -226,7 +248,7 @@ TEST_F(BrgemmBlockingTest, WithDataRepackingOnlyByM) { auto brgemm = linear_ir_ref->push_node(data_a.second, copy_b.second, BRGEMM_TYPE::REPACKING_ONLY, 0, 0, 0, VectorDims{}, VectorDims{}, VectorDims{}, m_blk, k, n, 0.f); const auto& brgemm_expr = *brgemm.first; - init_expr_descriptors(brgemm_expr, {{m_blk, k}, {k, n}, {m_blk, n}}); + init_expr_descriptors(brgemm_expr, {{m_blk, full}, {full, full}, {m_blk, full}}); create_brgemm_with_copy_b_loop_infos(linear_ir_ref, brgemm_expr, copy_b_expr, 384, m_blk); brgemm_expr->set_loop_ids({0}); auto result = linear_ir_ref->push_node(brgemm.second); diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp index c618c9e0d86fb5..2abfde0b3bb431 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp @@ -138,8 +138,8 @@ class MHAFP32BufferAllocationTest : public BufferAllocationCPUTest { const size_t k_blk = 16; const size_t n_blk = 64; const auto subtensor_scalar = std::vector{1}; - const auto subtensor_power = std::vector{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM}; - const auto subtensor_full = std::vector(2, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM); + const auto subtensor_power = std::vector{1, ov::snippets::utils::get_full_dim_value()}; + const auto subtensor_full = std::vector(2, ov::snippets::utils::get_full_dim_value()); const auto parameter0 = std::make_shared(ov::element::f32, ov::PartialShape({1, 12, 128, 64})); const auto parameter1 = std::make_shared(ov::element::f32, ov::PartialShape({1, 128, 12, 64})); @@ -196,8 +196,8 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest { const size_t k_blk = 16; const size_t n_blk = 64; const auto subtensor_scalar = std::vector{1}; - const auto subtensor_power = std::vector{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM}; - const auto subtensor_full = std::vector(2, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM); + const auto subtensor_power = std::vector{1, ov::snippets::utils::get_full_dim_value()}; + const auto subtensor_full = std::vector(2, ov::snippets::utils::get_full_dim_value()); const auto parameter0 = std::make_shared(ov::element::bf16, ov::PartialShape({1, 12, 128, 64})); const auto parameter1 = std::make_shared(ov::element::bf16, ov::PartialShape({1, 128, 12, 64})); diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index f0f8defe2dff50..f1cf31a2fa0979 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit f0f8defe2dff5058391f2a66e775e20b5de33b08 +Subproject commit f1cf31a2fa097932b8d74e88bf4bd941382504e4 diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp index 0fc6cbdac13132..fa8a8807bbd92c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp @@ -114,6 +114,15 @@ struct kernel_impl_params final { return output_layouts[idx]; } + layout& get_output_layout(size_t idx = 0) { + OPENVINO_ASSERT(output_layouts.size() > idx, + "The size of output layouts must be greater than the requested index: ", + "Requested index is ", idx, ",", + "but the size of output layouts is ", output_layouts.size()); + return output_layouts[idx]; + } + + bool has_fused_primitives() const { return !fused_desc.empty(); } ov::element::Type_t get_output_element_type() const { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index a454fc7afdee15..52e9f643c299d7 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -288,6 +288,15 @@ struct layout { return *this; } + layout clone_with_other_shape(const ov::PartialShape& new_shape) { + return layout(new_shape, this->data_type, this->format, this->data_padding); + } + + layout clone_with_other_shape(const ov::Shape& new_shape) { + return clone_with_other_shape(ov::PartialShape(new_shape)); + } + + friend bool operator==(const layout& lhs, const layout& rhs) { return lhs.data_type == rhs.data_type && lhs.format == rhs.format && lhs.size == rhs.size && lhs.data_padding == rhs.data_padding; } @@ -306,7 +315,7 @@ struct layout { return (lhs.data_padding < rhs.data_padding); } - /// Number of elements to be stored in this memory layout + /// Number of elements to be stored in this layout size_t count() const; /// Layout size with padding included diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index 146a1fa89b400b..09c5f01f216e57 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -250,7 +250,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren "Invalid Batch offset: exceeds data for output!"); } - if (node.can_be_optimized()) { + if (!node.is_dynamic() && node.can_be_optimized()) { update_output_memory(); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index e3471b37c05bd9..17cc9e9f42d38a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -470,7 +470,7 @@ bool crop_in_place_optimization::match(const program_node& node, for (auto user : node.get_users()) { // If the user node's output shape is already static, the padding // w/ dyn pad mask will not be propagated properly at runtime - if (node.is_dynamic() && !user->get_output_layout().is_dynamic()) + if (node.is_dynamic() && !user->get_output_pshape().is_dynamic()) return false; // do not optimize when next node is concatenation which is not output if (user->is_type() && !user->is_output()) @@ -484,10 +484,10 @@ bool crop_in_place_optimization::match(const program_node& node, if (node.is_dynamic() && (user->is_type() || user->is_type())) return false; if (user->is_type()) { - // runtime buffer fusing is only handled when there is only one reshape user - if (node.is_dynamic() && node.get_users().size() != 1) - return false; auto& reshape_node = user->as(); + // runtime buffer fusing is only handled when there is only one reshape user and reshape mode is base + if (node.is_dynamic() && (node.get_users().size() != 1 || reshape_node.get_primitive()->mode != reshape::reshape_mode::base)) + return false; if (can_reshape_be_optimized(reshape_node) && (!node.is_dynamic() || !reshape_node.is_runtime_propagatable_padding())) return false; @@ -500,6 +500,14 @@ bool crop_in_place_optimization::match(const program_node& node, if (node.is_constant()) return false; + // do not optimize variadic_split crop when either input1 or input2 is not constant. + // VariadicSplit ngraph shape infer requires value of axis(input1) and split_lengths(input2). + // And non_constant input1/input2 makes risky execution of runtime buffer fusing. + auto& crop_node = node.as(); + if ((crop_node.get_primitive()->op_mode == cldnn::crop_ngraph_op_mode::variadic_split) && + (!crop_node.get_dependency(1).is_constant() || !crop_node.get_dependency(2).is_constant())) + return false; + if (node.get_users().size() > 0) { if (node.get_program().is_body_program() && node.get_dependency(0).is_type()) { return false; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index c6de09403c1cef..88dcb8865d937a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -59,7 +59,20 @@ std::map get_preferred_formats(program& p, layout_o onednn_impls_counter++; } - if (onednn_impls_counter < 1 && lo.get_optimization_attributes().use_onednn_impls) { + // Fallback to ocl when asymmetric weights convolution is existed. + size_t total_convs = 0; + size_t num_asym_wei_convs = 0; + for (auto n : p.get_processing_order()) { + if (n->is_type()) { + total_convs++; + if (n->as().weights_zero_points_term()) + num_asym_wei_convs++; + } + } + + GPU_DEBUG_LOG << "Number of convolutions with weights zero points: " << num_asym_wei_convs << "/" << total_convs << std::endl; + + if (lo.get_optimization_attributes().use_onednn_impls && (onednn_impls_counter < 1 || num_asym_wei_convs > 0)) { should_update_fmt_map = true; lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 0); GPU_DEBUG_LOG << "Disable oneDNN implementations globally" << std::endl; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index e44ee477c0812f..70d0b70c7fa9fa 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -31,6 +31,15 @@ void select_preferred_formats::run(program& p) { return; #ifdef ENABLE_ONEDNN_FOR_GPU + + // Fallback to ocl when asymmetric weights convolution is existed. + if (_lo.get_optimization_attributes().use_onednn_impls) { + for (auto n : p.get_processing_order()) { + if (n->is_type() && n->as().weights_zero_points_term()) + return; + } + } + auto forcing_map = _lo.get_implementation_forcing(); engine.create_onednn_engine(p.get_config()); diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index b2acc2abf1c173..bcada1fa769fea 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1937,13 +1937,17 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d prim_input = node.get_dependency_index(node.as().input()); if (node.is_type()) prim_input = node.get_dependency_index(node.as().input()); + size_t prim_weights = node.get_primitive()->input_size(); // Note: did not handle attribute properly. especially for zero-point cldnn::format src_fmt = format::any; - if (idx == prim_input) + if (idx == prim_input) { src_fmt = onednn::find_data_format(prim_desc.src_desc()); - else // Dep for fused post ops + } else if (idx == prim_weights) { + src_fmt = format::custom; + } else { // Dep for fused post ops src_fmt = onednn::find_data_format(prim_desc.dst_desc()); + } // WA: shallow convolution needs to set input format by bfyx. // onednn recommended byxf for input format. It will insert reorder before shallow conv. diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 522fb03f15c5bd..f8267673722e64 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -465,7 +465,7 @@ void primitive_inst::update_shape() { auto desc = get_node().as().get_primitive(); auto var_mem_size = get_network().get_variable(desc->variable_info.variable_id).get_actual_mem_size(); // Need to trigger realloc_if_needed - if (var_mem_size < _impl_params->get_output_layout(0).get_buffer_size().count()) + if (var_mem_size < _impl_params->get_output_layout(0).get_linear_size()) set_shape_change(); } } @@ -684,13 +684,13 @@ event::ptr primitive_inst::realloc_if_needed() { prealloc_shape[seq_axis] += tmp_prealloc_count; required_buffer_size = std::accumulate(prealloc_shape.begin(), prealloc_shape.end(), size_t(1), std::multiplies()); } else { - required_buffer_size = (updated_layouts[i].get_buffer_size().count()); + required_buffer_size = (updated_layouts[i].get_linear_size()); } if (required_buffer_size * 10 < _max_output_layout_count[i]) { reclaim = true; } if (reclaim) { - GPU_DEBUG_TRACE_DETAIL << id() << ": Updated output[" << i << "] size " << updated_layouts[i].get_buffer_size().count() + GPU_DEBUG_TRACE_DETAIL << id() << ": Updated output[" << i << "] size " << updated_layouts[i].get_linear_size() << " is much smaller than current memory size! " << _max_output_layout_count[i] << "Reset memory of output " << i << std::endl; _max_output_layout_count[i] = 0; @@ -705,7 +705,7 @@ event::ptr primitive_inst::realloc_if_needed() { } for (size_t i = 0; i < actual_layouts.size(); ++i) { - bool can_reuse_buffer = (_outputs[i] && updated_layouts[i].get_buffer_size().count() <= _max_output_layout_count[i]); + bool can_reuse_buffer = (_outputs[i] && updated_layouts[i].get_linear_size() <= _max_output_layout_count[i]); std::pair prealloc_info; if (_node->is_type() && i == 0) { const auto& desc = _node->as().get_primitive(); @@ -717,17 +717,15 @@ event::ptr primitive_inst::realloc_if_needed() { prealloc_info = sp.predict_preallocation_shape(id(), updated_layouts[i], can_reuse_buffer, i, tmp_prealloc_count); } if (prealloc_info.first && sp.can_preallocate(ov::shape_size(prealloc_info.second) * (dt_sizes_in_B[i]))) { - auto new_layout = updated_layouts[i]; - new_layout.set_partial_shape(prealloc_info.second); - updated_params.output_layouts[i] = new_layout; + updated_params.output_layouts[i] = updated_layouts[i].clone_with_other_shape(prealloc_info.second); } - if (updated_params.output_layouts[i].get_buffer_size().count() < updated_layouts[i].get_buffer_size().count()) { + if (updated_params.output_layouts[i].get_linear_size() < updated_layouts[i].get_linear_size()) { updated_params.output_layouts[i] = updated_layouts[i]; } if (can_reuse_buffer) { GPU_DEBUG_TRACE_DETAIL << id() << ": reuse previously allocated output buffer[" << i << "] - " - << actual_layouts[i].get_buffer_size().count() << "/" << _max_output_layout_count[i] + << actual_layouts[i].get_linear_size() << "/" << _max_output_layout_count[i] << std::endl; if (_node->is_type() && (i == 0)) { // kv_cache has already assigned memory. @@ -759,7 +757,7 @@ event::ptr primitive_inst::realloc_if_needed() { GPU_DEBUG_TRACE_DETAIL << id() << ": realloc output memory. " << std::endl; GPU_DEBUG_TRACE_DETAIL << " outputs[" << i << "] " << " Current buffer_size=" << _max_output_layout_count[i] - << " Requested buffer_size=" << updated_layouts[i].get_buffer_size().count() + << " Requested buffer_size=" << updated_layouts[i].get_linear_size() << std::endl; _outputs[i] = allocate_output(_network.get_engine(), _network.get_memory_pool(), @@ -773,7 +771,7 @@ event::ptr primitive_inst::realloc_if_needed() { is_output_buffer(this, true), output_memory_ptr(i).get(), true); - _max_output_layout_count[i] = updated_params.output_layouts[i].get_buffer_size().count(); + _max_output_layout_count[i] = updated_params.output_layouts[i].get_linear_size(); GPU_DEBUG_CODE(std::string memalloc_info = ""); GPU_DEBUG_CODE(memalloc_info += (((_outputs.size() > 1) ? ("o" + to_string(i) + ":") : "") + (_outputs[i]->from_memory_pool ? "from_pool" : "new_alloc"));) @@ -1189,6 +1187,7 @@ void primitive_inst::do_runtime_in_place_kv_cache() { } const auto& desc = _node->as().get_primitive(); auto& past_layout = _impl_params->input_layouts[0]; + auto& new_layout = _impl_params->input_layouts[1]; auto& present_layout = _impl_params->output_layouts[0]; const auto& sequence_axis = desc->concat_axis; const auto& gather_axis = desc->gather_axis; @@ -1207,9 +1206,10 @@ void primitive_inst::do_runtime_in_place_kv_cache() { GPU_DEBUG_TRACE_DETAIL << "[do runtime kv_cache opt] " << id() << " initial present_layout : " << present_layout.to_string() << std::endl; GPU_DEBUG_TRACE_DETAIL << "[do runtime kv_cache opt] " << id() << " initial past_layout : " << past_layout.to_string() << std::endl; auto max_pad = kv_cache_inst::get_max_pad(past_layout, _deps[0].first->_max_output_layout_count[0], sequence_axis_legacy, "past_layout"); - - if (max_pad > 0) { - kv_cache_inst::update_pad(present_layout, max_pad - 1, sequence_axis_legacy); + const auto new_seq_len = static_cast(new_layout.get_shape()[sequence_axis]); + // In chatbot scenario, when chat history must be stored in kvcache, new_seq_len may not be 1 even if max_pad is greater than 0 + if (max_pad - new_seq_len >= 0) { + kv_cache_inst::update_pad(present_layout, max_pad - new_seq_len, sequence_axis_legacy); GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() << " Updated present_layout's pad : " << present_layout.to_string() << std::endl; auto& variable = get_network().get_variable(desc->variable_info.variable_id); variable.set_layout(present_layout); @@ -1850,7 +1850,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool _impl_params->strm = _network.get_stream_ptr(); for (size_t i = 0; i < get_node().get_output_layouts().size(); ++i) { if (_outputs.size() > i) { - _max_output_layout_count.push_back(_outputs[i] ? _outputs[i]->get_layout().get_buffer_size().count() : 0); + _max_output_layout_count.push_back(_outputs[i] ? _outputs[i]->get_layout().get_linear_size() : 0); } else { _outputs.push_back(nullptr); _max_output_layout_count.push_back(0); @@ -1983,9 +1983,9 @@ event::ptr primitive_inst::update_weights() { GPU_DEBUG_TRACE_DETAIL << id() << ": add original weights memory " << original_layout.to_short_string() << " to weights cache; " << "cache_size=" << _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl; } else { - auto expected_layout = reorder_kernel_params->get_output_layout(); // Set original partial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion - expected_layout.set_partial_shape(original_layout.get_partial_shape()); + auto expected_layout = + reorder_kernel_params->get_output_layout().clone_with_other_shape(original_layout.get_partial_shape()); _impl_params->weights_layout = optional_layout(expected_layout); if (_reordered_weights_cache.has(expected_layout)) { diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index eed87ed759211d..5cbef11dd3b045 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -109,7 +109,8 @@ layout reshape_inst::calc_output_layout(reshape_node const& node, kernel_impl_pa auto desc = impl_param.typed_desc(); if (desc->output_shape.count() == 0) { if (desc->output_partial_shape.size() != 0) { - return layout{desc->output_partial_shape, input_layout.data_type, input_layout.format}; + format out_fmt = format::adjust_to_rank(input_layout.format, desc->output_partial_shape.rank().get_length()); + return layout{desc->output_partial_shape, input_layout.data_type, out_fmt}; } else { OPENVINO_ASSERT("[GPU] Output shape is not provided"); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp index 890e086f28a6bd..271e8d6a620890 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp @@ -140,8 +140,10 @@ JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_pa if (conf.empty()) return jit; - if (params.fused_ops.size() == 1 && params.fused_ops[0].GetType() == KernelType::REORDER) + if (std::all_of(params.fused_ops.cbegin(), params.fused_ops.cend(), + [](fused_operation_desc desc) { return desc.GetType() == KernelType::REORDER; })) { return jit; + } try { for (auto& c : conf) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 07d81dce5e3f23..1613afec063eb1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -310,7 +310,7 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, if (!params.is_shape_agnostic && batch == 1) { // Tuning for Meteor Lake size_t min_num_threads = params.engineInfo.computeUnitsCount * simd; - if (output_f / 2 < min_num_threads && params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { + if (output_f / 2 <= min_num_threads && params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { GPU_DEBUG_TRACE_DETAIL << "FC bf tiled: Set ofm_tile 1. (output_f : " << output_f << ", computeUnitsCount : " << params.engineInfo.computeUnitsCount << " min_num_threads : " << min_num_threads << ")" << std::endl; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp index afda5292e69c60..85f8d79e7ace31 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -94,6 +94,7 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues1 @@ -174,6 +175,7 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues2 diff --git a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp index 883279ed369dd9..d4c50ec84ac78a 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp @@ -672,3 +672,30 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, eltwise_quantize_fs_b_yx_fsv32_exception, eltwise_test_params{ CASE_ELTWISE_FP16_BATCH_FS_B, 6, 6 }, eltwise_test_params{ CASE_ELTWISE_FP16_BATCH_B_FS, 6, 6 }, })); + +class eltwise_fusing_reorders : public EltwiseFusingTest { +public: + layout get_input_layout3(eltwise_test_params& p) { + return layout{ {1, 1, 1, p.input_size[3]}, p.input_type, p.input_format }; + } +}; +TEST_P(eltwise_fusing_reorders, reorders_for_data_type) { + auto p = GetParam(); + create_topologies( + input_layout("input", get_input_layout(p)), + data("data", get_mem(get_input_layout3(p))), + eltwise("eltwise", { input_info("input"), input_info("data") }, p.mode, p.default_type), + reorder("reorder1", input_info("eltwise"), format::bfyx, data_types::i32, {}, reorder_mean_mode::subtract, padding(), true), + reorder("reorder2", input_info("reorder1"), format::bfyx, data_types::f16, {}, reorder_mean_mode::subtract, padding(), true), + data("data2", get_mem(get_input_layout3(p))), + eltwise("eltwise_min", { input_info("reorder2"), input_info("data2") }, eltwise_mode::min, p.default_type), + reorder("out", input_info("eltwise_min"), p.default_format, data_types::f32) + ); + + tolerance = default_tolerance(p.input_type); + execute(p, true); +} + +INSTANTIATE_TEST_SUITE_P(fusings_gpu, eltwise_fusing_reorders, ::testing::ValuesIn(std::vector{ + eltwise_test_params{ { 1, 16, 16, 2 }, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::max, 4, 6 }, +})); diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index e4a077594c7a7e..e5506388eba273 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -777,6 +777,91 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic) { ASSERT_EQ(output_ptr_3[i], out3[i]); } +TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) { + auto& engine = get_test_engine(); + + auto in_layout = layout{ ov::PartialShape{-1, -1, -1}, data_types::f32, format::bfyx}; + auto in2_layout = layout{ ov::PartialShape{-1, -1}, data_types::f32, format::bfyx}; + auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); + auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto zp_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto axis_mem = engine.allocate_memory({ {}, data_types::i64, format::bfyx }); + auto shapeof_mem = engine.allocate_memory({ {2, 6}, data_types::f32, format::bfyx }); + + int64_t axis = 2; + set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, + 0.5f, -2.0f, -0.5f, -1.0f }); + set_values(axis_mem, {axis}); + set_values(shapeof_mem, { 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, 12.0f}); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); + set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f }); + set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f }); + set_values(zp_mem, { 1.0f, 2.0f, 2.0f, 1.0f, 4.0f, 1.0f, 6.0f, 2.0f }); + + std::vector out1 = { 13.f, 58.f, -11.f, -62.f }; + std::vector out2 = { -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f }; + std::vector out3 = { 13.f, 58.f, -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, -11.f, -62.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f }; + + cldnn::crop_ngraph_op_mode op_mode = cldnn::crop_ngraph_op_mode::variadic_split; + topology topology( + input_layout("input", in_layout), + input_layout("input_shapeof", in2_layout), + data("axis", axis_mem), + data("weights", weights_mem), + data("bias", bias_mem), + data("scale", scale_mem), + data("zp", zp_mem), + fully_connected("fc", input_info("input"), "weights", "bias", "scale", "zp", data_types::f32, 3, 2), + shape_of("shapeof", input_info("input_shapeof"), cldnn::data_types::i64), + crop("crop1", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 0, axis), + reorder("output1", input_info("crop1"), format::bfyx, data_types::f32), + crop("crop2", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 1, axis), + reshape("reshape", input_info("crop2"), true, std::vector{0, 0, 3, 2}, ov::PartialShape{-1, -1, 3, 2}, cldnn::reshape::reshape_mode::base), + reorder("output2", input_info("reshape"), format::bfyx, data_types::f32, std::vector(), reorder_mean_mode::subtract, padding(), true), + reorder("output3", input_info("fc"), format::bfyx, data_types::f32) + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + network.set_input_data("input", input_mem); + network.set_input_data("input_shapeof", shapeof_mem); + + std::map outputs; + EXPECT_NO_THROW(outputs = network.execute()); + + auto output = outputs.at("output1").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < out1.size(); i++) + ASSERT_EQ(output_ptr[i], out1[i]); + + auto output_2 = outputs.at("output2").get_memory(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); + + for (size_t i = 0; i < out2.size(); i++) + ASSERT_EQ(output_ptr_2[i], out2[i]); + + auto output_3 = outputs.at("output3").get_memory(); + cldnn::mem_lock output_ptr_3(output_3, get_test_stream()); + + for (size_t i = 0; i < out3.size(); i++) + ASSERT_EQ(output_ptr_3[i], out3[i]); +} + // Testing for implicit crop along batch axis and outer padding optimzing. // Outer padding opt includes opt out of reshape and reorder which has padded input only in batch axis // This optimzing also includes offset(outer axis padded input) handling of oneDNN primitive. diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 0bf595e124db89..132b2378420a03 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -9933,6 +9934,59 @@ TEST(convolution_gpu_onednn, has_proper_synchronization) { } } +// A test that detects crashes in OneDNN convolution selection checks +TEST(convolution_gpu_onednn, grouped_runtime_weights) { + auto& engine = get_test_engine(); + + if (!engine.get_device_info().supports_immad) + return; + + tests::random_generator rg(GET_SUITE_NAME); + + int64_t input_b = 1, input_f = 256, input_y = 29, input_x = 29; + auto input_size = ov::PartialShape{ input_b, input_f, input_y, input_x }; + auto input_data = rg.generate_random_4d(input_b, input_f, input_y, input_x, -1, 1); + auto input_data_byxf = flatten_4d(format::byxf, input_data); + auto input_mem = engine.allocate_memory({ input_size, data_types::f16, format::byxf }); + set_values(input_mem, input_data_byxf); + + int64_t weights_b = 1, weights_f = 256, weights_y = 5, weights_x = 5; + auto weights_size = ov::PartialShape{ weights_b, weights_f, weights_y, weights_x }; + auto weights_data = rg.generate_random_4d(weights_b, weights_f, weights_y, weights_x, -1, 1); + auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); + auto weights_mem = engine.allocate_memory({ weights_size, data_types::f16, format::bfyx }); + set_values(weights_mem, weights_data_bfyx); + + auto input = input_layout("input", input_mem->get_layout()); + auto weights = input_layout("weights", weights_mem->get_layout()); + auto weights_reshape = reshape("reshaped_weights", input_info("weights"), true, { 256, 1, 1, 5, 5 }, { 256, 1, 1, 5, 5 }); + auto conv = convolution("conv", input_info("input"), "reshaped_weights", no_bias, 256, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, true); + auto output_reorder = reorder("reorder", input_info("conv"), { data_types::f32, format::bfyx, { 1, 256, 25, 25 } }); + + topology topology(input, weights, weights_reshape, conv, output_reorder); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }})); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + + network.set_input_data("input", input_mem); + network.set_input_data("weights", weights_mem); + + auto output = network.execute(); + + ASSERT_EQ(output.size(), size_t(1)); + ASSERT_EQ(output.begin()->first, "reorder"); + + auto output_memory = output.at("reorder").get_memory(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + + ASSERT_EQ(output_layout.get_shape(), ov::Shape({1, 256, 25, 25})); +} + #endif // ENABLE_ONEDNN_FOR_GPU template diff --git a/src/plugins/intel_npu/CMakeLists.txt b/src/plugins/intel_npu/CMakeLists.txt index ce06b2542ac31c..6267fcfb288d38 100644 --- a/src/plugins/intel_npu/CMakeLists.txt +++ b/src/plugins/intel_npu/CMakeLists.txt @@ -30,8 +30,8 @@ add_subdirectory(src) if(ENABLE_TESTS) add_subdirectory(tests) + add_subdirectory(tools) endif() -add_subdirectory(tools) ov_cpack_add_component(${NPU_INTERNAL_COMPONENT} HIDDEN) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 12b94767b14154..52e3298907deb6 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -274,12 +274,14 @@ void ZeroInferRequest::create_pipeline() { void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor, const size_t index, const bool isInput) { + OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data"); auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors; auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData; bool setTensorData = false; bool levelZeroTensorCreatedLocally = true; + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); ze_memory_allocation_properties_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; auto res = zeMemGetAllocProperties(_initStructs->getContext(), tensor->data(), &desc, nullptr); @@ -306,6 +308,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor // random tensor if (tensorsData.at(index).has_value() && !tensorsData.at(index)->levelZeroTensorCreatedLocally) { _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor"); + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor"); levelZeroTensors.at(index) = allocate_tensor(isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index), @@ -327,6 +330,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor if (_pipelineIsCreated) { _logger.debug("ZeroInferRequest::infer_async - update command list"); + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); _pipeline->updateCommandList(*tensorsData.at(index), isInput ? _executor->get_input_descriptors().at(index).idx : _executor->get_output_descriptors().at(index).idx); @@ -337,6 +341,8 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr tensor, const size_t index, const bool isInput) { + OV_ITT_TASK_CHAIN(ZERO_SET_REMOTE_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_remote_tensor_data"); + auto l0_context = reinterpret_cast( extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context)); if (_initStructs->getContext() != l0_context) { @@ -357,6 +363,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrupdateCommandList(*tensorsData.at(index), isInput ? _executor->get_input_descriptors().at(index).idx : _executor->get_output_descriptors().at(index).idx); @@ -364,6 +371,8 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr& port, const ov::SoPtr& tensor) { + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "set_tensor"); + auto foundPort = find_port(port); OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port); try { @@ -392,6 +401,8 @@ void ZeroInferRequest::set_tensor(const ov::Output& port, const } ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output& port) const { + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_tensor"); + auto foundPort = find_port(port); OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port); @@ -428,10 +439,11 @@ void ZeroInferRequest::infer() { void ZeroInferRequest::infer_async() { _logger.debug("InferRequest::infer_async started"); - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "infer_async"); + OV_ITT_TASK_CHAIN(ZERO_INFER, itt::domains::LevelZeroBackend, "infer_async", "start"); _executor->mutexLock(); if (!_pipelineIsCreated) { + OV_ITT_TASK_NEXT(ZERO_INFER, "create_pipeline"); create_pipeline(); _pipelineIsCreated = true; @@ -469,6 +481,7 @@ void ZeroInferRequest::infer_async() { } _logger.info("Tensor is not allocated in the current Level Zero context"); + OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy"); std::memcpy(levelZeroBuffer, userBuffer, userTensor->get_byte_size()); } } @@ -476,6 +489,7 @@ void ZeroInferRequest::infer_async() { ++inputIndex; } + OV_ITT_TASK_NEXT(ZERO_INFER, "push"); _pipeline->push(); } @@ -518,6 +532,7 @@ void ZeroInferRequest::get_result() { } _logger.info("Tensor is not allocated in the current Level Zero context"); + OV_ITT_TASK_NEXT(ZERO_RESULT, "memcpy"); std::memcpy(userBuffer, levelZeroBuffer, userTensor->get_byte_size()); } } @@ -525,6 +540,7 @@ void ZeroInferRequest::get_result() { ++outputIndex; } + OV_ITT_TASK_NEXT(ZERO_RESULT, "reset"); _pipeline->reset(); _logger.debug("InferRequest::get_result finished"); } diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index a1d356a915b619..2e5712babbdc29 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -297,6 +297,10 @@ struct IntegratedPipeline final : public Pipeline { }; void updateCommandList(const TensorData& tensorsData, const uint32_t index) override { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, + itt::domains::LevelZeroBackend, + "IntegratedPipeline", + "updateCommandList"); const size_t numberOfCommandLists = _command_lists.size(); for (size_t i = 0; i < numberOfCommandLists; i++) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 40e4b257832b77..56387d620a8bca 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -211,15 +211,6 @@ Plugin::Plugin() // parse again env_variables after backend is initialized to get backend proprieties _globalConfig.parseEnvVars(); - // initialize properties which have device-tied default values in global config - // *only if there is a driver available - if (_metrics->GetAvailableDevicesNames().size() > 0) { - _globalConfig.update({{ov::intel_npu::stepping.name(), - std::to_string(_metrics->GetSteppingNumber(get_specified_device_name(_globalConfig)))}}); - _globalConfig.update({{ov::intel_npu::max_tiles.name(), - std::to_string(_metrics->GetMaxTiles(get_specified_device_name(_globalConfig)))}}); - } - // Map from name to function {Config -> ov::Any} // Note that some properties are RW before network is loaded, and become RO after network is loaded _properties = { @@ -462,14 +453,24 @@ Plugin::Plugin() {ov::intel_npu::stepping.name(), {false, ov::PropertyMutability::RW, - [](const Config& config) { - return config.get(); + [&](const Config& config) { + if (!config.has()) { + const auto specifiedDeviceName = get_specified_device_name(config); + return static_cast(_metrics->GetSteppingNumber(specifiedDeviceName)); + } else { + return config.get(); + } }}}, {ov::intel_npu::max_tiles.name(), {false, ov::PropertyMutability::RW, - [](const Config& config) { - return config.get(); + [&](const Config& config) { + if (!config.has()) { + const auto specifiedDeviceName = get_specified_device_name(config); + return static_cast(_metrics->GetMaxTiles(specifiedDeviceName)); + } else { + return config.get(); + } }}}, {ov::intel_npu::compilation_mode.name(), {false, @@ -622,7 +623,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // Update stepping w/ information from driver, unless provided by user or we are off-device // Ignore, if compilation was requested for platform, different from current if (!localConfig.has() && device != nullptr && - device->getName() == ov::intel_npu::Platform::standardize(platform)) { + device->getName() == ov::intel_npu::Platform::standardize(platform) && + _metrics->GetBackendName() == "level_zero") { try { localConfig.update({{ov::intel_npu::stepping.name(), std::to_string(device->getSubDevId())}}); } catch (...) { @@ -633,7 +635,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // Update max_tiles w/ information from driver, unless provided by user or we are off-device // Ignore, if compilation was requested for platform, different from current if (!localConfig.has() && device != nullptr && - device->getName() == ov::intel_npu::Platform::standardize(platform)) { + device->getName() == ov::intel_npu::Platform::standardize(platform) && + _metrics->GetBackendName() == "level_zero") { try { localConfig.update({{ov::intel_npu::max_tiles.name(), std::to_string(device->getMaxNumSlices())}}); } catch (...) { diff --git a/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp b/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp index c6ca8f50fd3f94..87b2301a7ae4fb 100644 --- a/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp +++ b/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp @@ -5,8 +5,11 @@ #pragma once +#include #include +#include + namespace npu { namespace utils { @@ -58,5 +61,12 @@ inline ov::Tensor toFP32(const ov::Tensor& in, void* ptr = nullptr) { */ std::vector> parseTensorsAsFP32(const std::map& tensors); +/** + * @brief Join several non-batched tensors having the same shapes and precisions into a batched one. + * + * @param tensors The source non-batched tensors + * @return The merged batched tensor + */ +ov::Tensor joinTensors(const std::list& tensors, const ov::Layout& layout); } // namespace utils } // namespace npu diff --git a/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp b/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp index 470d737a2b9d31..32616b86135243 100644 --- a/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp +++ b/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp @@ -468,5 +468,29 @@ std::vector> parseTensorsAsFP32(const std::map& tensors, const ov::Layout& layout) { + if (tensors.empty()) { + OPENVINO_THROW("Cannot join tensors: nothing to join"); + } + if (!ov::layout::has_batch(layout)) { + OPENVINO_THROW("Cannot join tensors: has no batch_idx in layout", layout.to_string()); + } + auto pivotShape = tensors.front().get_shape(); + auto pivotPrecision = tensors.front().get_element_type(); + if (!std::all_of(tensors.begin(), tensors.end(), [&pivotShape, &pivotPrecision](const auto& t) { + return t.get_shape() == pivotShape && t.get_element_type() == pivotPrecision; + })) { + OPENVINO_THROW("Cannot join tensors with different shapes, expected: ", pivotPrecision, ", ", pivotShape); + } + pivotShape[ov::layout::batch_idx(layout)] *= tensors.size(); + ov::Tensor out(pivotPrecision, pivotShape); + const auto outputBuffer = out.data(); + size_t bytesOffset = 0; + for (const auto& t : tensors) { + memcpy(reinterpret_cast(outputBuffer) + bytesOffset, t.data(), t.get_byte_size()); + bytesOffset += t.get_byte_size(); + } + return out; +} } // namespace utils } // namespace npu diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp index 2d14dbd23e0d7e..e29b5025158373 100644 --- a/src/plugins/intel_npu/tools/single-image-test/main.cpp +++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -66,10 +68,18 @@ DEFINE_string(device, "", "Device to use"); DEFINE_string(config, "", "Path to the configuration file (optional)"); DEFINE_string(ip, "", "Input precision (default: U8, available: FP32, FP16, I32, I64, U8)"); DEFINE_string(op, "", "Output precision (default: FP32, available: FP32, FP16, I32, I64, U8)"); -DEFINE_string(il, "", "Input layout"); -DEFINE_string(ol, "", "Output layout"); -DEFINE_string(iml, "", "Model input layout"); -DEFINE_string(oml, "", "Model output layout"); +DEFINE_string( + il, "", + "Input layout for all inputs, or ';' separated list of pairs :. Regex in is supported"); +DEFINE_string(ol, "", + "Output layout for all outputs, or ';' separated list of pairs :. Regex in is " + "supported"); +DEFINE_string(iml, "", + "Model input layout for all model inputs, or ';' separated list of pairs :. Regex in " + " is supported"); +DEFINE_string(oml, "", + "Model output layout for all outputs, or ';' separated list of pairs :. Regex in " + " is supported"); DEFINE_bool(img_as_bin, false, "Force binary input even if network expects an image"); DEFINE_bool(pc, false, "Report performance counters"); @@ -156,6 +166,25 @@ std::vector splitStringList(const std::string& str, char delim) { return out; } +std::map parseArgMap(std::string argMap) { + argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end()); + + const auto pairs = splitStringList(argMap, ';'); + + std::map parsedMap; + for (auto&& pair : pairs) { + const auto lastDelimPos = pair.find_last_of(':'); + auto key = pair.substr(0, lastDelimPos); + std::string value; + if (lastDelimPos != std::string::npos) { + value = pair.substr(lastDelimPos + 1); + } + parsedMap[std::move(key)] = std::move(value); + } + + return parsedMap; +} + void parseCommandLine(int argc, char* argv[]) { std::ostringstream usage; usage << "Usage: " << argv[0] << "[]"; @@ -531,6 +560,38 @@ std::vector> parseMeanOrScale(const std::string& mean_scale, return result; } +using RegexPtr = std::unique_ptr; +std::map parseLayoutRegex(std::string layouts) { + std::map input_output_layouts = parseArgMap(std::move(layouts)); + + std::map out; + for (const auto& input_output_layout : input_output_layouts) { + auto [name, value] = input_output_layout; + if (value.empty()) { + if (name.empty()) { + throw std::runtime_error("Can't parse layouts string \"" + layouts + + "\" into valid \"input:layout;input:layout\" pairs"); + } + // there is no value only name, thus we consider input/output name as "any" and + // apply layout value as the parsed name + out.emplace(std::make_unique(".*"), name); + continue; + } + std::string valid_regex_str = name.empty() ? ".*" : "^" + name + "$"; + out.emplace(std::make_unique(std::move(valid_regex_str)), std::move(value)); + } + return out; +} + +template +std::optional getRegexSubstitutionIfExist(const std::string& haystack, const std::map& substitutions) { + for (const auto& s : substitutions) { + if (std::regex_search(haystack, *s.first)) { + return {s.second}; + } + } + return {}; +} // // File utils // @@ -569,27 +630,70 @@ ov::Tensor loadImage(const ov::element::Type& precision, const ov::Shape& shape, return tensor; } -ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape& shape, const std::string& filePath, - const ov::element::Type& dataPrecision) { +ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape& shape, const ov::Layout& layout, + const std::string& filePath, const ov::element::Type& dataPrecision) { std::ifstream binaryFile(filePath, std::ios_base::binary | std::ios_base::ate); OPENVINO_ASSERT(binaryFile, "Failed to open input binary file: ", filePath); - const auto fileBytes = binaryFile.tellg(); + const auto fileSize = binaryFile.tellg(); binaryFile.seekg(0, std::ios_base::beg); OPENVINO_ASSERT(binaryFile.good(), "While reading a file an error is encountered"); - - const ov::Tensor requestedTensor(modelPrecision, shape); - const int reqTensorBytes = static_cast(requestedTensor.get_byte_size()); + const size_t fileBytes = static_cast(fileSize); + ov::Tensor requestedTensor(modelPrecision, shape); + const size_t reqTensorBytes = static_cast(requestedTensor.get_byte_size()); if (dataPrecision != modelPrecision && dataPrecision != ov::element::Type_t::undefined) { std::cout << "Converting " << filePath << " input from " << dataPrecision << " to " << modelPrecision << std::endl; const ov::Tensor inputTensor(dataPrecision, shape); - binaryFile.read(reinterpret_cast(inputTensor.data()), static_cast(fileBytes)); - npu::utils::convertTensorPrecision(inputTensor, requestedTensor); + if (fileBytes == inputTensor.get_byte_size()) { + binaryFile.read(reinterpret_cast(inputTensor.data()), static_cast(fileBytes)); + npu::utils::convertTensorPrecision(inputTensor, requestedTensor); + } else { + std::cout << "File contains " << fileBytes + << " bytes, but it expected to be: " << inputTensor.get_byte_size() + << " while converting precision from " << dataPrecision << " to " << modelPrecision + << ". Check whether it is possible to batch loading " << std::endl; + OPENVINO_ASSERT(ov::layout::has_batch(layout), + "Input layout has no batch dimenstion: ", layout.to_string()); + size_t N = shape[ov::layout::batch_idx(layout)]; + OPENVINO_ASSERT(fileBytes * N == inputTensor.get_byte_size(), "File contains ", fileBytes, " bytes, but ", + inputTensor.get_byte_size() * N, " total in batch size ", N, + " expected while converting precision from ", dataPrecision, " to ", modelPrecision); + ov::Shape debatchedInputTensorShape(shape); + debatchedInputTensorShape[ov::layout::batch_idx(layout)] = 1; + const ov::Tensor inputDebatchedTensor(dataPrecision, debatchedInputTensorShape); + binaryFile.read(reinterpret_cast(inputDebatchedTensor.data()), + static_cast(fileBytes)); + const ov::Tensor convertedPrecisionTensor(modelPrecision, debatchedInputTensorShape); + npu::utils::convertTensorPrecision(inputDebatchedTensor, convertedPrecisionTensor); + std::list tensorsToJoin; + std::generate_n(std::back_inserter(tensorsToJoin), N, [&convertedPrecisionTensor]() { + return convertedPrecisionTensor; + }); + requestedTensor = npu::utils::joinTensors(tensorsToJoin, layout); + } + } else { - OPENVINO_ASSERT(fileBytes == reqTensorBytes, "File contains ", fileBytes, " bytes, but ", reqTensorBytes, - " expected"); - binaryFile.read(reinterpret_cast(requestedTensor.data()), static_cast(reqTensorBytes)); + if (fileBytes == reqTensorBytes) { + binaryFile.read(reinterpret_cast(requestedTensor.data()), + static_cast(reqTensorBytes)); + } else { + std::cout << "File contains " << fileBytes << " bytes, but it expected to be: " << reqTensorBytes + << " when datatypes match. " + << ". Check whether it is possible to batch loading " << std::endl; + OPENVINO_ASSERT(ov::layout::has_batch(layout), + "Input layout has no batch dimenstion: ", layout.to_string()); + size_t N = shape[ov::layout::batch_idx(layout)]; + OPENVINO_ASSERT(fileBytes * N == reqTensorBytes, "File contains ", fileBytes, " bytes, but ", + reqTensorBytes, " in batch size ", N, " expected"); + + // duplicate a binary into tensor memory if the tensor batched + for (size_t n = 0; n < N; ++n) { + binaryFile.seekg(0, std::ios_base::beg); + binaryFile.read(reinterpret_cast(requestedTensor.data()) + fileBytes * n, + static_cast(fileBytes)); + } + } } return requestedTensor; @@ -617,7 +721,7 @@ ov::Tensor loadInput(const ov::element::Type& modelPrecision, const ov::Shape& s if (isImage(shape, layout) && !FLAGS_img_as_bin) { return loadImage(modelPrecision, shape, layout, filePath, colorFormat); } else { - return loadBinary(modelPrecision, shape, filePath, dataPrecision); + return loadBinary(modelPrecision, shape, layout, filePath, dataPrecision); } } @@ -1620,10 +1724,10 @@ static int runSingleImageTest() { throw std::logic_error("Parameter -op " + FLAGS_op + " is not supported"); } - ov::Layout inUserLayout(FLAGS_il); - ov::Layout outUserLayout(FLAGS_ol); - ov::Layout inModelLayout(FLAGS_iml); - ov::Layout outModelLayout(FLAGS_oml); + std::map inUserLayouts = parseLayoutRegex(FLAGS_il); + std::map outUserLayouts = parseLayoutRegex(FLAGS_ol); + std::map inModelLayouts = parseLayoutRegex(FLAGS_iml); + std::map outModelLayouts = parseLayoutRegex(FLAGS_oml); std::vector inputFilesPerCase; std::vector> inputFilesForOneInfer; @@ -1712,10 +1816,16 @@ static int runSingleImageTest() { } // Input layout - if (!inUserLayout.empty()) { - for (size_t i = 0; i < inputInfo.size(); ++i) { + for (size_t i = 0; i < inputInfo.size(); ++i) { + if (std::optional inUserLayout = + getRegexSubstitutionIfExist(inputInfo[i].get_any_name(), inUserLayouts); + inUserLayout.has_value()) { ov::Layout inLayerModelLayout; - if (inModelLayout.empty()) { + if (std::optional inModelLayout = + getRegexSubstitutionIfExist(inputInfo[i].get_any_name(), inModelLayouts); + inModelLayout.has_value()) { + inLayerModelLayout = inModelLayout.value(); + } else { const auto shape = inputInfo[i].get_shape(); inLayerModelLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Configuring preprocessing. Since --iml option isn't set, input model " @@ -1723,11 +1833,12 @@ static int runSingleImageTest() { << inputInfo[i].get_any_name() << "\" is infered from shape: " << toString(shape) << " rank (" << shape.size() << ") as " << inLayerModelLayout.to_string() << std::endl; - } else { - inLayerModelLayout = inModelLayout; } + std::cout << "Set layouts for the input: \"" << inputInfo[i].get_any_name() << "\", model " + << inLayerModelLayout.to_string() << ", user " << inUserLayout.value().to_string() + << std::endl; ppp.input(i).model().set_layout(inLayerModelLayout); - ppp.input(i).tensor().set_layout(inUserLayout); + ppp.input(i).tensor().set_layout(inUserLayout.value()); } } @@ -1766,10 +1877,16 @@ static int runSingleImageTest() { } // Output layout - if (!outUserLayout.empty()) { - for (size_t i = 0; i < outputInfo.size(); ++i) { + for (size_t i = 0; i < outputInfo.size(); ++i) { + if (std::optional outUserLayout = + getRegexSubstitutionIfExist(outputInfo[i].get_any_name(), outUserLayouts); + outUserLayout.has_value()) { ov::Layout outLayerModelLayout; - if (outModelLayout.empty()) { + if (std::optional outModelLayout = + getRegexSubstitutionIfExist(outputInfo[i].get_any_name(), outModelLayouts); + outModelLayout.has_value()) { + outLayerModelLayout = outModelLayout.value(); + } else { const auto shape = outputInfo[i].get_shape(); outLayerModelLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Configuring preprocessing. Since --oml option isn't set, output model " @@ -1777,11 +1894,12 @@ static int runSingleImageTest() { << outputInfo[i].get_any_name() << "\" is infered from shape: " << toString(shape) << " rank (" << shape.size() << ") as " << outLayerModelLayout.to_string() << std::endl; - } else { - outLayerModelLayout = outModelLayout; } + std::cout << "Set layouts for the output: \"" << outputInfo[i].get_any_name() << "\", model " + << outLayerModelLayout.to_string() << ", user " << outUserLayout.value().to_string() + << std::endl; ppp.output(i).model().set_layout(outLayerModelLayout); - ppp.output(i).tensor().set_layout(outUserLayout); + ppp.output(i).tensor().set_layout(outUserLayout.value()); } } @@ -1852,10 +1970,14 @@ static int runSingleImageTest() { // Determine the input layout ov::Layout inputLayout; - if (!inUserLayout.empty()) { - inputLayout = inUserLayout; - } else if (!inModelLayout.empty()) { - inputLayout = inModelLayout; + if (std::optional inUserLayout = + getRegexSubstitutionIfExist(inputInfo.get_any_name(), inUserLayouts); + inUserLayout.has_value()) { + inputLayout = inUserLayout.value(); + } else if (std::optional inModelLayout = + getRegexSubstitutionIfExist(inputInfo.get_any_name(), inModelLayouts); + inModelLayout.has_value()) { + inputLayout = inModelLayout.value(); } else { inputLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Loading input data. Since --iml option isn't set, input model layout for " @@ -1905,7 +2027,9 @@ static int runSingleImageTest() { LayoutMap outputLayouts; // Several metrics may require this // Load the reference data - for (const auto& [tensorName, tensor] : outputTensors) { + for (const auto& out : compiledModel.outputs()) { + const auto& tensorName = out.get_any_name(); + const auto& tensor = outputTensors.at(tensorName); const ov::element::Type& precision = tensor.get_element_type(); const ov::Shape& shape = tensor.get_shape(); @@ -1922,10 +2046,14 @@ static int runSingleImageTest() { // Determine the output layout ov::Layout outputLayout; - if (!outUserLayout.empty()) { - outputLayout = outUserLayout; - } else if (!outModelLayout.empty()) { - outputLayout = outModelLayout; + if (std::optional outUserLayout = + getRegexSubstitutionIfExist(tensorName, outUserLayouts); + outUserLayout.has_value()) { + outputLayout = outUserLayout.value(); + } else if (std::optional outModelLayout = + getRegexSubstitutionIfExist(tensorName, outModelLayouts); + outModelLayout.has_value()) { + outputLayout = outModelLayout.value(); } else { outputLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Since --oml option isn't set, output model layout for layer \"" @@ -1941,7 +2069,8 @@ static int runSingleImageTest() { outputInd = 0; // Dump the outputs obtained upon prediction - for (const auto& tensorEntry : outputTensors) { + for (const auto& out : compiledModel.outputs()) { + const auto& tensor = outputTensors.at(out.get_any_name()); std::ostringstream ostr; ostr << netFileName << "_kmb_out_" << outputInd << "_case_" << numberOfTestCase << ".blob"; const auto blobFileName = ostr.str(); @@ -1949,7 +2078,7 @@ static int runSingleImageTest() { std::cout << "Dump device output #" << outputInd << "_case_" << numberOfTestCase << " to " << blobFileName << std::endl; - dumpTensor(tensorEntry.second, blobFileName); + dumpTensor(tensor, blobFileName); ++outputInd; } @@ -2048,13 +2177,14 @@ static int runSingleImageTest() { } } else { size_t outputInd = 0; - for (const auto& tensorEntry : outputTensors) { + for (const auto& out : compiledModel.outputs()) { + const auto& tensor = outputTensors.at(out.get_any_name()); std::ostringstream ostr; ostr << netFileName << "_ref_out_" << outputInd << "_case_" << numberOfTestCase << ".blob"; const auto blobFileName = ostr.str(); std::cout << "Dump reference output #" << outputInd << " to " << blobFileName << std::endl; - dumpTensor(tensorEntry.second, blobFileName); + dumpTensor(tensor, blobFileName); ++outputInd; } diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp index d0452c9da1b638..82a8795698bb36 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp @@ -42,6 +42,7 @@ typedef std::tuple< std::vector, std::string, ov::pass::low_precision::LayerTransformation::Params, + bool, // use precision transparent operations RecurrentCellTransformationParam >RecurrentCellTransformationParams; diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp index e94663bf2b8596..692a00877c3368 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp @@ -21,14 +21,16 @@ std::string RecurrentCellTransformation::getTestCaseName(testing::TestParamInfo< std::string targetDevice; RecurrentCellTransformationParam param; ov::pass::low_precision::LayerTransformation::Params params; - std::tie(netPrecision, activationsShape, weightsShape, targetDevice, params, param) = obj.param; + bool addPrecisionTransparentOperations; + std::tie(netPrecision, activationsShape, weightsShape, targetDevice, params, addPrecisionTransparentOperations, param) = obj.param; std::ostringstream result; result << get_test_case_name_by_params(netPrecision, activationsShape[0], targetDevice, params) << "FQ_X_" << param.fakeQuantize_X << "_" << "DQ_X_" << param.dequantization_X << "_" << "FQ_W_" << param.fakeQuantize_W << "_" << - "DQ_W_" << param.dequantization_W; + "DQ_W_" << param.dequantization_W << "_" << + "PTO" << addPrecisionTransparentOperations; return result.str(); } @@ -37,9 +39,10 @@ void RecurrentCellTransformation::SetUp() { std::vector activations_shapes; std::vector weights_shapes; RecurrentCellTransformationParam param; + bool addPrecisionTransparentOperations; ov::pass::low_precision::LayerTransformation::Params params; - std::tie(precision, activations_shapes, weights_shapes, targetDevice, params, param) = this->GetParam(); + std::tie(precision, activations_shapes, weights_shapes, targetDevice, params, addPrecisionTransparentOperations, param) = this->GetParam(); init_input_shapes(activations_shapes); @@ -64,13 +67,14 @@ void RecurrentCellTransformation::SetUp() { param.dequantization_H, param.dequantization_W, param.dequantization_R - }); + }, + addPrecisionTransparentOperations); } void RecurrentCellTransformation::run() { LayerTransformation::run(); - const auto params = std::get<5>(GetParam()); + const auto params = std::get<6>(GetParam()); const auto actualPrecision = get_runtime_precision_by_type(params.layerName); auto expectedPrecision = params.expectedKernelType; if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ov::element::f16) { diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/broadcast.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/broadcast.hpp new file mode 100644 index 00000000000000..4384fecd089ea6 --- /dev/null +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/broadcast.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "low_precision/layer_transformation.hpp" +#include "ov_lpt_models/common/dequantization_operations.hpp" + +namespace ov { +namespace builder { +namespace subgraph { + +class BroadcastFunction { +public: + static std::shared_ptr get( + const bool v1, + const ov::PartialShape& inputShape, + const ov::element::Type precisionBeforeDequantization, + const ov::builder::subgraph::DequantizationOperations& dequantizationBefore, + const Shape& tagetShape, + const Shape& axesMapping, + const ov::builder::subgraph::DequantizationOperations& dequantizationAfter); +}; + +} // namespace subgraph +} // namespace builder +} // namespace ov diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp index da98410c55d13c..57ffdedc4c0eb6 100644 --- a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -25,7 +25,8 @@ class RecurrentCellFunction { const RNNType type, const std::vector& fqOnDatas, const std::vector& converts, - const std::vector& dequantizations); + const std::vector& dequantizations, + const bool addPrecisionTransparentOperations = false); }; std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptr input, @@ -33,7 +34,8 @@ std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptr +std::shared_ptr make_broadcast(const std::shared_ptr& parent, const Shape& tagetShape, const Shape& axesMapping) { + return std::make_shared( + parent, + std::make_shared(ov::element::i32, Shape{ tagetShape.size() }, tagetShape), + std::make_shared(ov::element::i32, Shape{ axesMapping.size() }, axesMapping)); +} +} // namespace + +std::shared_ptr BroadcastFunction::get( + const bool v1, + const ov::PartialShape& inputShape, + const ov::element::Type precisionBeforeDequantization, + const ov::builder::subgraph::DequantizationOperations& dequantizationBefore, + const Shape& tagetShape, + const Shape& axesMapping, + const ov::builder::subgraph::DequantizationOperations& dequantizationAfter) { + const auto input = std::make_shared(precisionBeforeDequantization, inputShape); + std::shared_ptr parent = input; + + if (!dequantizationBefore.empty()) { + parent = makeDequantization(parent, dequantizationBefore); + } + + parent = v1 ? + make_broadcast(parent, tagetShape, axesMapping) : + make_broadcast(parent, tagetShape, axesMapping); + parent->set_friendly_name("broadcast"); + + if (!dequantizationAfter.empty()) { + parent = makeDequantization(parent, dequantizationAfter); + } + + const std::shared_ptr result = std::make_shared(parent); + + const std::shared_ptr function = std::make_shared( + ov::ResultVector{ result }, + std::vector> { input }, + "BroadcastTransformation"); + return function; +} + +} // namespace subgraph +} // namespace builder +} // namespace ov diff --git a/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp b/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp index 7be3fca1217403..7a3537c91f3824 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -30,7 +30,8 @@ std::shared_ptr RecurrentCellFunction::get( const RNNType type, const std::vector& fqOnDatas, const std::vector& converts, - const std::vector& dequantizations) { + const std::vector& dequantizations, + const bool addPrecisionTransparentOperations) { auto X = std::make_shared(inputPrecision, inputActivationsShapes[0]); X->set_friendly_name("X"); std::shared_ptr parent_X = makeQuantizationAndDequantization(X, @@ -46,7 +47,8 @@ std::shared_ptr RecurrentCellFunction::get( H->get_friendly_name(), fqOnDatas[1], converts[1], - dequantizations[1]); + dequantizations[1], + addPrecisionTransparentOperations); auto C = std::make_shared(inputPrecision, inputActivationsShapes[2]); C->set_friendly_name("C"); @@ -58,7 +60,8 @@ std::shared_ptr RecurrentCellFunction::get( W->get_friendly_name(), fqOnDatas[2], converts[2], - dequantizations[2]); + dequantizations[2], + addPrecisionTransparentOperations); auto R = ov::opset1::Constant::create(fqOnDatas[2].empty() ? ov::element::i8 : inputPrecision, inputWeightsShapes[1], {1}); @@ -127,12 +130,20 @@ std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptr parent; - if (fqOnData.empty()) { - parent = input; - } else { - std::shared_ptr fakeQuantize1 = makeFakeQuantizeTypeRelaxed(input, inputPrecision, fqOnData); + const DequantizationOperations& dequantization, + const bool addPrecisionTransparentOperations) { + std::shared_ptr parent = input; + if (addPrecisionTransparentOperations) { + auto shape = input->get_output_shape(0); + std::swap(shape[shape.size() - 2], shape[shape.size() - 1]); + parent = std::make_shared( + parent, + std::make_shared(element::u32, Shape({ shape.size() }), shape), + true); + } + + if (!fqOnData.empty()) { + std::shared_ptr fakeQuantize1 = makeFakeQuantizeTypeRelaxed(parent, inputPrecision, fqOnData); fakeQuantize1->set_friendly_name("fakeQuantize_" + friendly_name); parent = fakeQuantize1; } @@ -142,6 +153,15 @@ std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptrget_output_shape(0); + parent = std::make_shared( + parent, + std::make_shared(element::u32, Shape({ shape.size() }), shape), + true); + } + return parent; } diff --git a/tests/constraints.txt b/tests/constraints.txt index c0ab1a660164f4..16bffdf16967db 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -18,7 +18,7 @@ opencv-python>=4.5 paddlepaddle==2.6.1 protobuf>=3.18.1,<4.0.0 py>=1.9.0 -pytest>=5.0,<7.5 +pytest>=5.0,<8.4 pytest-dependency==0.5.1 pytest-html==4.1.1 pytest-timeout==2.2.0 @@ -27,6 +27,7 @@ jaxlib<=0.4.14 kornia==0.7.0 networkx<=3.3 keras>=2.0.0,<3.0.0 +timm==1.0.7 --extra-index-url https://download.pytorch.org/whl/cpu -torch>=1.13,<2.3 \ No newline at end of file +torch>=1.13,<2.4 \ No newline at end of file diff --git a/tests/e2e_tests/requirements.txt b/tests/e2e_tests/requirements.txt index 2c37134327f7cc..2d380c682819aa 100644 --- a/tests/e2e_tests/requirements.txt +++ b/tests/e2e_tests/requirements.txt @@ -20,7 +20,7 @@ scikit-image>=0.17.2 tabulate==0.9.0 pytest>=5.0,<=7.0.1; python_version < '3.10' -pytest==7.2.0; python_version >= '3.10' +pytest==8.3.1; python_version >= '3.10' pytest-cov==2.11.1 # pytest-html==1.19.0 pytest-html diff --git a/tests/layer_tests/pytorch_tests/test_batch_norm.py b/tests/layer_tests/pytorch_tests/test_batch_norm.py index 577a036af70240..8e72ae33eaa15e 100644 --- a/tests/layer_tests/pytorch_tests/test_batch_norm.py +++ b/tests/layer_tests/pytorch_tests/test_batch_norm.py @@ -60,5 +60,7 @@ def forward(self, x): @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export def test_batch_norm(self, weights, bias, eps, train, running_stats, ie_device, precision, ir_version, kwargs_to_prepare_input): + if running_stats and self.use_torch_export(): + pytest.skip("running_mean not supported by torch.export") self._test(*self.create_model(weights, bias, eps, train, running_stats), ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, dynamic_shapes=False, use_mo_convert=False) diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index 20a70367e047f5..6ef8ca25a692a0 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -93,7 +93,7 @@ def test_full(self, shape, value, ie_device, precision, ir_version): @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export @@ -104,7 +104,7 @@ def test_full_dtype(self, shape, value, dtype, with_names, ie_device, precision, @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly def test_full_out(self, shape, value, dtype, with_names, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, use_out=True, with_names=with_names), ie_device, precision, @@ -496,7 +496,7 @@ def test_zeros_ones(self, op_type, shape, ie_device, precision, ir_version): @pytest.mark.parametrize("shape", [(1, 1), (1, 2), (1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5, 6)]) @pytest.mark.parametrize("op_type", ["aten::zeros", "aten::ones"]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export @@ -508,7 +508,7 @@ def test_zeros_ones_with_dtype(self, op_type, shape, dtype, with_names, ie_devic @pytest.mark.parametrize("shape", [(1, 1), (1, 2), (1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5, 6)]) @pytest.mark.parametrize("op_type", ["aten::zeros", "aten::ones"]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly def test_zeros_ones_with_out(self, op_type, shape, dtype, with_names, ie_device, precision, ir_version): self._test(*self.create_model(op_type, dtype=dtype, with_out=True, with_names=with_names), ie_device, precision, diff --git a/tests/model_hub_tests/models_hub_common/utils.py b/tests/model_hub_tests/models_hub_common/utils.py index 6dac33640162de..068826669fab5b 100644 --- a/tests/model_hub_tests/models_hub_common/utils.py +++ b/tests/model_hub_tests/models_hub_common/utils.py @@ -27,7 +27,6 @@ def get_models_list(file_name: str): model_name, model_link = model_info.split(',') elif len(model_info.split(',')) == 4: model_name, model_link, mark, reason = model_info.split(',') - assert mark in ["skip", "xfail"], "Incorrect failure mark for model info {}".format(model_info) models.append((model_name, model_link, mark, reason)) return models diff --git a/tests/model_hub_tests/pytorch/hf_transformers_models b/tests/model_hub_tests/pytorch/hf_transformers_models index 5da9db39095810..f79f32b6d93ee8 100644 --- a/tests/model_hub_tests/pytorch/hf_transformers_models +++ b/tests/model_hub_tests/pytorch/hf_transformers_models @@ -4,7 +4,7 @@ abeja/gpt-neox-japanese-2.7b,gpt_neox_japanese acl-submission-anonym/EAM-spectral,examuse,skip,Load problem adalbertojunior/modular-test,modular,skip,Load problem adept/persimmon-8b-base,persimmon -aerner/lm-v2,open-llama,xfail,Example input problem +aerner/lm-v2,open-llama afonsosamarques/ardt-vanilla-combo_train_hopper_v2-2508_1336-33,decision_transformer,xfail,Tracing problem aihijo/gec-zh-gector-bert-large,gector,skip,Load problem albert-base-v2,albert @@ -170,6 +170,7 @@ huggingface/time-series-transformer-tourism-monthly,time_series_transformer,skip HuggingFaceM4/tiny-random-idefics,idefics,xfail,Unsupported op aten::any aten::einsum prim::TupleConstruct prim::TupleUnpack HuggingFaceM4/tiny-random-vllama-clip,vllama,skip,Load problem HuggingFaceM4/tiny-random-vopt-clip,vopt,skip,Load problem +HuggingFaceH4/zephyr-7b-beta,mistral HuiHuang/gpt3-damo-base-zh,gpt3,skip,Load problem hustvl/yolos-tiny,yolos iakarshu/tilt_base,tilt_base_configuration,skip,Load problem @@ -184,7 +185,7 @@ jaketae/fastspeech2-ljspeech,fastspeech2,skip,Load problem jambran/depression-classification,DepressionDetection,skip,Load problem Jellywibble/dalio-reward-charlie-v1,reward-model,skip,Load problem JonasGeiping/crammed-bert-legacy,crammedBERT,skip,Load problem -jonatasgrosman/wav2vec2-large-xlsr-53-english,wav2vec2,xfail,Unsupported op aten::index_put_ prim::TupleConstruct +jonatasgrosman/wav2vec2-large-xlsr-53-english,wav2vec2 Joqsan/test-my-fnet,my_fnet,skip,Load problem jozhang97/deta-swin-large,deta,skip,Load problem jploski/retnet-mini-shakespeare,retnet,skip,Load problem @@ -257,7 +258,6 @@ microsoft/xclip-base-patch32,xclip microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet miguelvictor/python-fromzero-lstmlm,lstmlm,skip,Load problem mingzi151/test-hf-wav2vec2bert,wav2vec2bert,skip,Load problem -mistralai/Mistral-7B-v0.1,mistral MIT/ast-finetuned-audioset-10-10-0.4593,audio-spectrogram-transformer Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime,luke mlml-chip/thyme2_colon_e2e,cnlpt,skip,Load problem diff --git a/tests/model_hub_tests/pytorch/test_timm.py b/tests/model_hub_tests/pytorch/test_timm.py index 1e168de83a50d5..78bd632179be6f 100644 --- a/tests/model_hub_tests/pytorch/test_timm.py +++ b/tests/model_hub_tests/pytorch/test_timm.py @@ -8,7 +8,7 @@ import torch from models_hub_common.utils import get_models_list -from torch_utils import TestTorchConvertModel, process_pytest_marks +from torch_utils import TestTorchConvertModel def filter_timm(timm_list: list) -> list: @@ -42,10 +42,6 @@ def filter_timm(timm_list: list) -> list: return sorted([v[1] for v in unique_models.values()]) -def get_all_models() -> list: - return process_pytest_marks(os.path.join(os.path.dirname(__file__), "timm_models")) - - # To make tests reproducible we seed the random generator torch.manual_seed(0) @@ -82,10 +78,16 @@ def test_convert_model_precommit(self, name, ie_device): self.run(name, None, ie_device) @pytest.mark.nightly - @pytest.mark.parametrize("name", get_all_models()) + @pytest.mark.parametrize("name,link,mark,reason", get_models_list(os.path.join(os.path.dirname(__file__), "timm_models"))) @pytest.mark.parametrize("mode", ["trace", "export"]) - def test_convert_model_all_models(self, mode, name, ie_device): + def test_convert_model_all_models(self, mode, name, link, mark, reason, ie_device): self.mode = mode + assert mark is None or mark in [ + 'skip', 'xfail', 'xfail_trace', 'xfail_export'], f"Incorrect test case for {name}" + if mark == 'skip': + pytest.skip(reason) + elif mark in ['xfail', f'xfail_{mode}']: + pytest.xfail(reason) self.run(name, None, ie_device) @pytest.mark.nightly diff --git a/tests/model_hub_tests/pytorch/test_torchvision_models.py b/tests/model_hub_tests/pytorch/test_torchvision_models.py index 9aeabbbe09b032..31aeaedb2366d4 100644 --- a/tests/model_hub_tests/pytorch/test_torchvision_models.py +++ b/tests/model_hub_tests/pytorch/test_torchvision_models.py @@ -7,8 +7,9 @@ import pytest import torch import torchvision.transforms.functional as F +from models_hub_common.utils import get_models_list -from torch_utils import process_pytest_marks, TestTorchConvertModel +from torch_utils import TestTorchConvertModel def get_all_models() -> list: @@ -103,10 +104,15 @@ def test_convert_model_precommit_export(self, model_name, ie_device): self.mode = "export" self.run(model_name, None, ie_device) - @pytest.mark.parametrize("name", - process_pytest_marks(os.path.join(os.path.dirname(__file__), "torchvision_models"))) + @pytest.mark.parametrize("name,link,mark,reason", get_models_list(os.path.join(os.path.dirname(__file__), "torchvision_models"))) @pytest.mark.parametrize("mode", ["trace", "export"]) @pytest.mark.nightly - def test_convert_model_all_models(self, mode, name, ie_device): + def test_convert_model_all_models(self, mode, name, link, mark, reason, ie_device): self.mode = mode + assert mark is None or mark in [ + 'skip', 'xfail', 'xfail_trace', 'xfail_export'], f"Incorrect test case for {name}" + if mark == 'skip': + pytest.skip(reason) + elif mark in ['xfail', f'xfail_{mode}']: + pytest.xfail(reason) self.run(name, None, ie_device) diff --git a/tests/model_hub_tests/pytorch/timm_models b/tests/model_hub_tests/pytorch/timm_models index 9087edc24ffe2c..6aa64a90c19071 100644 --- a/tests/model_hub_tests/pytorch/timm_models +++ b/tests/model_hub_tests/pytorch/timm_models @@ -13,7 +13,7 @@ cait_s36_384.fb_dist_in1k,None cait_xs24_384.fb_dist_in1k,None cait_xxs24_224.fb_dist_in1k,None cait_xxs36_224.fb_dist_in1k,None -coat_tiny.in1k,None +coat_tiny.in1k,None,xfail_export,Requested None inlined input coatnet_bn_0_rw_224.sw_in1k,None coatnet_nano_rw_224.sw_in1k,None coatnet_rmlp_1_rw2_224.sw_in12k,None @@ -23,7 +23,7 @@ convformer_b36.sail_in1k,None convformer_m36.sail_in1k,None convformer_s18.sail_in1k,None convformer_s36.sail_in1k,None -convit_base.fb_in1k,None,xfail,Trace failed +convit_base.fb_in1k,None,xfail_trace,Trace failed convmixer_1024_20_ks9_p14.in1k,None convmixer_1536_20.in1k,None convnext_atto_ols.a2_in1k,None @@ -102,10 +102,10 @@ edgenext_xx_small.in1k,None efficientformer_l1.snap_dist_in1k,None efficientformer_l3.snap_dist_in1k,None efficientformer_l7.snap_dist_in1k,None -efficientformerv2_l.snap_dist_in1k,None -efficientformerv2_s0.snap_dist_in1k,None -efficientformerv2_s1.snap_dist_in1k,None -efficientformerv2_s2.snap_dist_in1k,None +efficientformerv2_l.snap_dist_in1k,None,xfail_export,Requested None inlined input +efficientformerv2_s0.snap_dist_in1k,None,xfail_export,Requested None inlined input +efficientformerv2_s1.snap_dist_in1k,None,xfail_export,Requested None inlined input +efficientformerv2_s2.snap_dist_in1k,None,xfail_export,Requested None inlined input efficientnet_b0.ra_in1k,None efficientnet_b1.ft_in1k,None efficientnet_b1_pruned.in1k,None @@ -144,6 +144,9 @@ eva02_base_patch14_224.mim_in22k,None eva02_base_patch16_clip_224.merged2b,None eva02_large_patch14_clip_224.merged2b,None fastvit_ma36.apple_dist_in1k,None +fastvit_mci0.apple_mclip,None +fastvit_mci1.apple_mclip,None +fastvit_mci2.apple_mclip,None,xfail_trace,Accuracy validation failed fastvit_s12.apple_dist_in1k,None fastvit_sa12.apple_dist_in1k,None fastvit_sa24.apple_dist_in1k,None @@ -167,7 +170,7 @@ gcresnext50ts.ch_in1k,None gcvit_base.in1k,None gernet_s.idstcv_in1k,None ghostnet_100.in1k,None -ghostnetv2_100.in1k,None +ghostnetv2_100.in1k,None,xfail_export,Requested None inlined input gmixer_24_224.ra3_in1k,None gmlp_s16_224.ra3_in1k,None halo2botnet50ts_256.a1h_in1k,None @@ -188,16 +191,18 @@ hgnetv2_b3.ssld_stage1_in22k_in1k,None hgnetv2_b4.ssld_stage1_in22k_in1k,None hgnetv2_b5.ssld_stage1_in22k_in1k,None hgnetv2_b6.ssld_stage1_in22k_in1k,None -hrnet_w18_small.gluon_in1k,None -hrnet_w18_small_v2.gluon_in1k,None -hrnet_w18_ssld.paddle_in1k,None -hrnet_w30.ms_in1k,None -hrnet_w32.ms_in1k,None -hrnet_w40.ms_in1k,None -hrnet_w44.ms_in1k,None -hrnet_w48.ms_in1k,None -hrnet_w48_ssld.paddle_in1k,None -hrnet_w64.ms_in1k,None +hiera_base_224.mae,None +hiera_base_plus_224.mae,None +hrnet_w18_small.gluon_in1k,None,xfail_export,Requested None inlined input +hrnet_w18_small_v2.gluon_in1k,None,xfail_export,Requested None inlined input +hrnet_w18_ssld.paddle_in1k,None,xfail_export,Requested None inlined input +hrnet_w30.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w32.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w40.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w44.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w48.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w48_ssld.paddle_in1k,None,xfail_export,Requested None inlined input +hrnet_w64.ms_in1k,None,xfail_export,Requested None inlined input inception_next_base.sail_in1k,None inception_resnet_v2.tf_ens_adv_in1k,None inception_v3.gluon_in1k,None @@ -236,6 +241,9 @@ mobilenetv2_110d.ra_in1k,None mobilenetv2_120d.ra_in1k,None mobilenetv3_rw.rmsp_in1k,None mobilenetv3_small_050.lamb_in1k,None +mobilenetv4_conv_blur_medium.e500_r224_in1k,None +mobilenetv4_conv_small.e1200_r224_in1k,None +mobilenetv4_hybrid_medium.e500_r224_in1k,None mobileone_s0.apple_in1k,None mobileone_s1.apple_in1k,None mobileone_s2.apple_in1k,None @@ -324,6 +332,8 @@ resnest50d.in1k,None resnest50d_1s4x24d.in1k,None resnest50d_4s2x40d.in1k,None resnet101.a1_in1k,None +resnet101_clip.openai,None +resnet101_clip_gap.openai,None resnet101c.gluon_in1k,None resnet101d.gluon_in1k,None resnet101s.gluon_in1k,None @@ -344,10 +354,18 @@ resnet33ts.ra2_in1k,None resnet34.a1_in1k,None resnet34d.ra2_in1k,None resnet50.a1_in1k,None +resnet50_clip.openai,None +resnet50_clip_gap.openai,None resnet50_gn.a1h_in1k,None resnet50c.gluon_in1k,None resnet50d.a1_in1k,None resnet50s.gluon_in1k,None +resnet50x16_clip.openai,None +resnet50x16_clip_gap.openai,None +resnet50x4_clip.openai,None +resnet50x4_clip_gap.openai,None +resnet50x64_clip.openai,None +resnet50x64_clip_gap.openai,None resnet51q.ra2_in1k,None resnet61q.ra2_in1k,None resnetaa101d.sw_in12k,None @@ -388,7 +406,7 @@ selecsls60.in1k,None selecsls60b.in1k,None semnasnet_075.rmsp_in1k,None senet154.gluon_in1k,None -sequencer2d_s.in1k,None +sequencer2d_s.in1k,None,xfail_export,No conversion rule found for operations aten.mkldnn_rnn_layer.default seresnet152d.ra2_in1k,None seresnet33ts.ra2_in1k,None seresnet50.a1_in1k,None @@ -453,7 +471,7 @@ tinynet_b.in1k,None tinynet_c.in1k,None tinynet_d.in1k,None tinynet_e.in1k,None -tnt_s_patch16_224,None +tnt_s_patch16_224,None,xfail_export,Requested None inlined input tresnet_m.miil_in1k,None tresnet_v2_l.miil_in21k,None twins_pcpvt_base.in1k,None @@ -467,25 +485,39 @@ vgg16_bn.tv_in1k,None vgg19.tv_in1k,None vgg19_bn.tv_in1k,None visformer_tiny.in1k,None +vit_base_mci_224.apple_mclip,None vit_base_patch14_dinov2.lvd142m,None vit_base_patch14_reg4_dinov2.lvd142m,None vit_base_patch16_224.augreg2_in21k_ft_in1k,None vit_base_patch16_224_miil.in21k,None vit_base_patch16_clip_224.datacompxl,None vit_base_patch16_clip_quickgelu_224.metaclip_2pt5b,None +vit_base_patch16_rope_reg1_gap_256.sbb_in1k,None,xfail,Argument shapes are inconsistent vit_base_patch16_rpn_224.sw_in1k,None vit_base_patch16_siglip_224.webli,None +vit_base_patch16_siglip_gap_224.webli,None vit_base_patch32_224.augreg_in1k,None vit_base_patch32_clip_224.datacompxl,None vit_base_patch32_clip_quickgelu_224.metaclip_2pt5b,None vit_base_patch8_224.augreg2_in21k_ft_in1k,None vit_base_r50_s16_224.orig_in21k,None +vit_betwixt_patch16_reg1_gap_256.sbb_in1k,None +vit_betwixt_patch16_reg4_gap_256.sbb_in12k,None +vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1k,None,xfail,Argument shapes are inconsistent +vit_betwixt_patch32_clip_224.tinyclip_laion400m,None vit_huge_patch14_224.mae,None vit_huge_patch14_gap_224.in1k_ijepa,None vit_large_patch14_clip_224.datacompxl,None vit_large_patch14_clip_quickgelu_224.dfn2b,None vit_large_r50_s32_224.augreg_in21k,None +vit_little_patch16_reg1_gap_256.sbb_in12k,None +vit_little_patch16_reg4_gap_256.sbb_in1k,None vit_medium_patch16_gap_240.sw_in12k,None +vit_medium_patch16_reg1_gap_256.sbb_in1k,None +vit_medium_patch16_reg4_gap_256.sbb_in12k,None +vit_mediumd_patch16_reg4_gap_256.sbb_in12k,None +vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1k,None,xfail,Argument shapes are inconsistent +vit_pwee_patch16_reg1_gap_256.sbb_in1k,None vit_relpos_base_patch16_224.sw_in1k,None vit_relpos_base_patch16_clsgap_224.sw_in1k,None vit_relpos_base_patch32_plus_rpn_256.sw_in1k,None @@ -493,13 +525,18 @@ vit_relpos_medium_patch16_cls_224.sw_in1k,None vit_relpos_medium_patch16_rpn_224.sw_in1k,None vit_small_r26_s32_224.augreg_in21k,None vit_so400m_patch14_siglip_224.webli,None +vit_so400m_patch14_siglip_gap_224.pali_mix,None,skip,Access to model google/paligemma-3b-mix-224-jax is restricted vit_srelpos_small_patch16_224.sw_in1k,None vit_tiny_r_s16_p8_224.augreg_in21k,None -volo_d1_224.sail_in1k,None -volo_d2_224.sail_in1k,None -volo_d3_224.sail_in1k,None -volo_d4_224.sail_in1k,None -volo_d5_224.sail_in1k,None +vit_wee_patch16_reg1_gap_256.sbb_in1k,None +vit_xsmall_patch16_clip_224.tinyclip_yfcc15m,None +vitamin_base_224.datacomp1b_clip,None,xfail,RuntimeError Error in loading state_dict for VisionTransformer +vitamin_large2_224.datacomp1b_clip,None +volo_d1_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d2_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d3_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d4_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d5_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension wide_resnet101_2.tv2_in1k,None wide_resnet50_2.racm_in1k,None xception41.tf_in1k,None diff --git a/tests/model_hub_tests/pytorch/torchvision_models b/tests/model_hub_tests/pytorch/torchvision_models index 35e6805bd18152..a045925ed54f4a 100644 --- a/tests/model_hub_tests/pytorch/torchvision_models +++ b/tests/model_hub_tests/pytorch/torchvision_models @@ -3,9 +3,9 @@ convnext_base,none convnext_large,none convnext_small,none convnext_tiny,none -deeplabv3_mobilenet_v3_large,none -deeplabv3_resnet101,none -deeplabv3_resnet50,none +deeplabv3_mobilenet_v3_large,none,xfail_export,Requested None inlined input +deeplabv3_resnet101,none,xfail_export,Requested None inlined input +deeplabv3_resnet50,none,xfail_export,Requested None inlined input densenet121,none densenet161,none densenet169,none @@ -21,11 +21,11 @@ efficientnet_b7,none efficientnet_v2_l,none efficientnet_v2_m,none efficientnet_v2_s,none -fcn_resnet101,none -fcn_resnet50,none +fcn_resnet101,none,xfail_export,Requested None inlined input +fcn_resnet50,none,xfail_export,Requested None inlined input googlenet,none inception_v3,none -lraspp_mobilenet_v3_large,none +lraspp_mobilenet_v3_large,none,xfail_export,Requested None inlined input maxvit_t,none mc3_18,none mnasnet0_5,none @@ -39,8 +39,8 @@ mvit_v1_b,none mvit_v2_s,none r2plus1d_18,none r3d_18,none -raft_large,none -raft_small,none +raft_large,none,xfail_export,Mutating module attribute corr_pyramid during export +raft_small,none,xfail_export,Mutating module attribute corr_pyramid during export regnet_x_16gf,none regnet_x_1_6gf,none regnet_x_32gf,none diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index 9c6d95a2a668d6..cec8d2be4baf81 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit 9c6d95a2a668d6ae41aebda42b15608db7dd3fa0 +Subproject commit cec8d2be4baf81c191091abd83c59507fc12d2e8 diff --git a/tools/constraints.txt b/tools/constraints.txt index 21961ea88f9e14..258ed7a8b3208c 100644 --- a/tools/constraints.txt +++ b/tools/constraints.txt @@ -7,7 +7,7 @@ mxnet~=1.2.0; sys_platform == 'win32' mxnet>=1.7.0.post2,<=1.9.1; sys_platform != 'win32' onnx>=1.8.1,<=1.15.0 networkx<=3.1.0 -pytest>=5.0,<7.3 +pytest>=5.0,<8.4 protobuf>=3.18.1,<4.0.0 defusedxml>=0.7.1 requests>=2.25.1