Merge branch 'master' into github_actions/local_cache_impl

mryzhov · Mar 21, 2024 · 6303e65 · 6303e65
2 parents 6d0d8ee + a8c224f
commit 6303e65
Show file tree

Hide file tree

Showing 123 changed files with 2,913 additions and 697 deletions.
diff --git a/.github/scripts/collect_github_metrics.py b/.github/scripts/collect_github_metrics.py
@@ -6,37 +6,49 @@
 import logging
 import psycopg2
 import dateutil
+import argparse
 
 def init_logger():
     LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
     logging.basicConfig(level=LOGLEVEL,
                         format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                         datefmt='%m-%d-%Y %H:%M:%S')
 
+def make_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-r', '--repository-name', type=str, required=True,
+                        help='Repository name in OWNER/REPOSITORY format')
+    parser.add_argument('--run-id', type=str, required=True,
+                        help='Workflow Run ID')
+
+    return parser
+
 def create_db_tables(conn, cur):
-    cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_runs_test(
-    id SERIAL,
-    run_id BIGINT PRIMARY KEY,
+    cur.execute('''CREATE TABLE IF NOT EXISTS workflow_runs(
+    id SERIAL PRIMARY KEY,
+    run_id BIGINT,
     html_url TEXT,
     name VARCHAR(255),
     run_started_at TIMESTAMP,
+    created_at TIMESTAMP,
+    updated_at TIMESTAMP,
     triggering_actor_login VARCHAR(255),
     conclusion VARCHAR(25),
-    run_number INT,
     event VARCHAR(50),
     run_attempt INT,
     repository_full_name VARCHAR(255),
     head_repository_full_name VARCHAR(255),
     head_branch VARCHAR(255),
     status VARCHAR(25),
     display_title TEXT,
-    path TEXT
+    path TEXT,
+    total_duration_seconds INT
     );
     ''')
-    cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_jobs_test(
-    id SERIAL,
-    job_id BIGINT PRIMARY KEY,
-    parent_run_id BIGINT REFERENCES github_workflow_runs_test(run_id),
+    cur.execute('''CREATE TABLE IF NOT EXISTS workflow_jobs(
+    id SERIAL PRIMARY KEY,
+    job_id BIGINT,
+    parent_run_id BIGINT,
     html_url TEXT,
     name VARCHAR(255),
     created_at TIMESTAMP,
@@ -47,12 +59,14 @@ def create_db_tables(conn, cur):
     runner_name VARCHAR(255),
     status VARCHAR(25),
     conclusion VARCHAR(25),
-    head_branch VARCHAR(255)
+    head_branch VARCHAR(255),
+    run_attempt INT,
+    workflow_name TEXT
     );
     ''')
-    cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_steps_test(
+    cur.execute('''CREATE TABLE IF NOT EXISTS workflow_steps(
     id SERIAL PRIMARY KEY,
-    parent_job_id BIGINT REFERENCES github_workflow_jobs_test(job_id),
+    parent_job_id BIGINT,
     name VARCHAR(255),
     conclusion VARCHAR(25),
     number INT,
@@ -65,20 +79,16 @@ def create_db_tables(conn, cur):
 
 def main():
     init_logger()
-
+    parser = make_parser()
+    args = parser.parse_args()
     logger = logging.getLogger(__name__)
 
     github_token = os.environ.get('GITHUB_TOKEN')
     if not github_token:
         raise ValueError('GITHUB_TOKEN environment variable is not set!')
 
-    run_id = os.environ.get('RUN_ID')
-    if not run_id:
-        raise ValueError('RUN_ID environment variable is not set!')
-
-    repo_name = os.environ.get('GITHUB_REPOSITORY')
-    if not repo_name:
-        raise ValueError('GITHUB_REPOSITORY environment variable is not set!')
+    run_id = args.run_id
+    repo_name = args.repository_name
 
 
     # this should be specified in runner's env
@@ -102,18 +112,30 @@ def main():
     repo = g.get_repo(repo_name)
 
     run = repo.get_workflow_run(int(run_id))
-
-    workflow_data_query = f'''INSERT INTO github_workflow_runs_test(
+    if run.status != 'completed':
+        logger.error('Run %s is not completed! Only completed runs should be in the database', run_id)
+        raise SystemExit(1)
+
+    # We rely on the following assumptions:
+    # - The workflow run is completed. When run.status != 'completed' we should not add it to the database
+    #   theoretically the second attempt can be triggerred right after the completion of the first one
+    #   or while the runner which executes this script is deploying
+    #
+    # - Job's queued duration equals "job.started_at - job.created_at" if started_at > created_at.
+    #   Otherwise the job should not be added to the database
+    total_duration_seconds = round(run.timing().run_duration_ms / 1000)
+    workflow_data_query = f'''INSERT INTO workflow_runs(
     run_id, html_url, name,
-    run_started_at, triggering_actor_login, conclusion,
-    run_number, event, run_attempt, repository_full_name,
-    head_branch, display_title, path)
+    run_started_at, created_at, updated_at, triggering_actor_login, conclusion,
+    event, run_attempt, repository_full_name,
+    head_branch, display_title, path, total_duration_seconds)
     VALUES(
     '{run_id}', '{run.html_url}', '{run.name}', '{run.run_started_at}',
+    '{run.created_at}', '{run.updated_at}',
     '{run.raw_data['triggering_actor']['login']}',
-    '{run.conclusion}', '{run.run_number}', '{run.event}',
+    '{run.conclusion}', '{run.event}',
     '{run.run_attempt}', '{run.raw_data['repository']['full_name']}',
-    '{run.head_branch}', '{run.display_title}', '{run.path}'
+    '{run.head_branch}', '{run.display_title}', '{run.path}', '{total_duration_seconds}'
     );
     '''
 
@@ -126,6 +148,10 @@ def main():
         duration_seconds = 0
 
         job_created_at_date = dateutil.parser.parse(job.raw_data['created_at'])
+        if job_created_at_date > job.started_at:
+            logger.warning('Skipping job %s of run %s - most likely a stub \
+            job created after workflow restart', job.name, run_id)
+            continue
 
         queued_duration_timedelta = job.started_at - job_created_at_date
         queued_duration_seconds = round(queued_duration_timedelta.total_seconds())
@@ -134,17 +160,19 @@ def main():
         duration_seconds = round(duration_timedelta.total_seconds())
 
         job_data_query = f'''
-        INSERT INTO github_workflow_jobs_test(
+        INSERT INTO workflow_jobs(
         job_id, parent_run_id, html_url, name,
         created_at, started_at, completed_at,
         queued_duration_seconds, duration_seconds,
-        runner_name, status, conclusion, head_branch)
+        runner_name, status, conclusion, head_branch,
+        run_attempt, workflow_name
+        )
         VALUES(
         '{job_id}', '{run_id}', '{job.html_url}', '{job.name}',
         '{job.raw_data['created_at']}', '{job.started_at}', '{job.completed_at}',
         '{queued_duration_seconds}', '{duration_seconds}',
         '{job.raw_data['runner_name']}', '{job.status}', '{job.conclusion}',
-        '{job.raw_data['head_branch']}'
+        '{job.raw_data['head_branch']}', '{job.raw_data['run_attempt']}', '{job.raw_data['workflow_name']}'
         );
         '''
         logger.debug('Job query: %s', job_data_query)
@@ -154,7 +182,7 @@ def main():
             duration_seconds = round(duration_seconds_timedelta.total_seconds())
 
             step_data_query = f'''
-            INSERT INTO github_workflow_steps_test(
+            INSERT INTO workflow_steps(
             parent_job_id, name, conclusion,
             number, started_at, completed_at,
             duration_seconds)

diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml
@@ -39,9 +39,5 @@ jobs:
       - name: CMake configure
         run: cmake -DCMAKE_BUILD_TYPE=Release -DTHREADING=SEQ -B build
 
-      - name: Get number of CPU cores
-        uses: SimenB/github-actions-cpu-cores@v2
-        id: cpu-cores
-
       - name: Build snippets
-        run:  cmake --build build --target openvino_docs_snippets --parallel ${{ steps.cpu-cores.outputs.count }}
+        run:  cmake --build build --target openvino_docs_snippets --parallel
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -54,10 +54,6 @@ jobs:
           python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_tf2.txt
           python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_dev.txt
 
-      - name: Get number of CPU cores
-        uses: SimenB/github-actions-cpu-cores@v2
-        id: cpu-cores
-
       - name: Build OpenVINO with CMake
         uses: ashutoshvarma/action-cmake-build@master
         with:
@@ -81,7 +77,6 @@ jobs:
             -DCMAKE_CXX_LINKER_LAUNCHER=ccache
             -DENABLE_SYSTEM_SNAPPY=ON
           build-type: Release
-          parallel: ${{ steps.cpu-cores.outputs.count }}
 
       - name: Install wheel packages
         run: cmake -DCOMPONENT=python_wheels -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install_pkg -P '${{ github.workspace }}/build/cmake_install.cmake'
@@ -129,7 +124,6 @@ jobs:
             -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
             -DCMAKE_C_LINKER_LAUNCHER=ccache
             -DCMAKE_CXX_LINKER_LAUNCHER=ccache
-          parallel: ${{ steps.cpu-cores.outputs.count }}
 
 
       - name: Print info

diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml
@@ -49,6 +49,8 @@ jobs:
       - name: Install deps
         run: |
           pip3 install -r .github/scripts/requirements.txt
+          # dependency review action has these as an exception
+          # yet it still complains, so install them here
           pip3 install PyGithub==2.2.0 psycopg2-binary==2.9.9
 
       - name: Send metrics to SQL database
@@ -58,6 +60,9 @@ jobs:
           PGHOST: ${{ secrets.METRICS_DATABASE_HOST }}
           PGUSER: ${{ secrets.METRICS_DATABASE_USERNAME }}
           PGPASSWORD: ${{ secrets.METRICS_DATABASE_PASSWORD }}
+          PGDATABASE: ${{ secrets.METRICS_DATABASE_NAME }}
           PGPORT: 5432
         run: |
-          python3 .github/scripts/collect_github_metrics.py
+          python3 .github/scripts/collect_github_metrics.py \
+          --run-id ${{ github.event.workflow_run.id }} \
+          --repository-name ${GITHUB_REPOSITORY}
diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference.rst
@@ -23,44 +23,43 @@ Optimize Inference
                  optimizations that can be done independently. Inference
                  speed depends on latency and throughput.
 
-
-Runtime optimization, or deployment optimization, focuses on tuning inference parameters and execution means (e.g., the optimum number of requests executed simultaneously). Unlike model-level optimizations, they are highly specific to the hardware and case they are used for, and often come at a cost.
-``ov::hint::inference_precision`` is a "typical runtime configuration" which trades accuracy for performance, allowing ``fp16/bf16`` execution for the layers that remain in ``fp32`` after quantization of the original ``fp32`` model.
-
-Therefore, optimization should start with defining the use case. For example, if it is about processing millions of samples by overnight jobs in data centers, throughput could be prioritized over latency. On the other hand, real-time usages would likely trade off throughput to deliver the results at minimal latency. A combined scenario is also possible, targeting the highest possible throughput, while maintaining a specific latency threshold.
-
-It is also important to understand how the full-stack application would use the inference component "end-to-end." For example, to know what stages need to be orchestrated to save workload devoted to fetching and preparing input data.
-
-For more information on this topic, see the following articles:
-
-* :doc:`Supported Devices <../../about-openvino/compatibility-and-support/supported-devices>`
-* :doc:`Inference Devices and Modes <inference-devices-and-modes>`
-* :ref:`Inputs Pre-processing with the OpenVINO <inputs_pre_processing>`
-* :ref:`Async API <async_api>`
-* :ref:`The 'get_tensor' Idiom <tensor_idiom>`
-* For variably-sized inputs, consider :doc:`dynamic shapes <dynamic-shapes>`
-
-
-See the :doc:`latency <optimize-inference/optimizing-latency>` and :doc:`throughput <optimize-inference/optimizing-throughput>` optimization guides, for **use-case-specific optimizations**
-
-Writing Performance-Portable Inference Applications
-###################################################
-
-Although inference performed in OpenVINO Runtime can be configured with a multitude of low-level performance settings, it is not recommended in most cases. Firstly, achieving the best performance with such adjustments requires deep understanding of device architecture and the inference engine.
-
-
-Secondly, such optimization may not translate well to other device-model combinations. In other words, one set of execution parameters is likely to result in different performance when used under different conditions. For example:
-
-* both the CPU and GPU support the notion of :doc:`streams <./optimize-inference/optimizing-throughput/advanced_throughput_options>`, yet they deduce their optimal number very differently.
-* Even among devices of the same type, different execution configurations can be considered optimal, as in the case of instruction sets or the number of cores for the CPU and the batch size for the GPU.
-* Different models have different optimal parameter configurations, considering factors such as compute vs memory-bandwidth, inference precision, and possible model quantization.
-* Execution "scheduling" impacts performance strongly and is highly device-specific, for example, GPU-oriented optimizations like batching, combining multiple inputs to achieve the optimal throughput, :doc:`do not always map well to the CPU <optimize-inference/optimizing-low-level-implementation>`.
-
-
-To make the configuration process much easier and its performance optimization more portable, the option of :doc:`Performance Hints <optimize-inference/high-level-performance-hints>` has been introduced. It comprises two high-level "presets" focused on either **latency** or **throughput** and, essentially, makes execution specifics irrelevant.
-
-The Performance Hints functionality makes configuration transparent to the application, for example, anticipates the need for explicit (application-side) batching or streams, and facilitates parallel processing of separate infer requests for different input sources
-
+Runtime, or deployment optimization focuses on tuning inference and execution parameters. Unlike
+model-level optimization, it is highly specific to the hardware you use and the goal you want
+to achieve. You need to plan whether to prioritize accuracy or performance,
+:doc:`throughput <optimize-inference/optimizing-throughput>` or :doc:`latency <optimize-inference/optimizing-latency>`,
+or aim at the golden mean. You should also predict how scalable your application needs to be
+and how exactly it is going to work with the inference component. This way, you will be able
+to achieve the best results for your product.
+
+.. note::
+
+   For more information on this topic, see the following articles:
+
+   * :doc:`Inference Devices and Modes <inference-devices-and-modes>`
+   * :ref:`Inputs Pre-processing with the OpenVINO <inputs_pre_processing>`
+   * :ref:`Async API <async_api>`
+   * :ref:`The 'get_tensor' Idiom <tensor_idiom>`
+   * For variably-sized inputs, consider :doc:`dynamic shapes <dynamic-shapes>`
+
+Performance-Portable Inference
+################################
+
+To make configuration easier and performance optimization more portable, OpenVINO offers the
+:doc:`Performance Hints <optimize-inference/high-level-performance-hints>` feature. It comprises
+two high-level “presets” focused on latency **(default)** or throughput.
+
+Although inference with OpenVINO Runtime can be configured with a multitude
+of low-level performance settings, it is not recommended, as:
+
+* It requires deep understanding of device architecture and the inference engine.
+* It may not translate well to other device-model combinations. For example:
+
+  * CPU and GPU deduce their optimal number of streams differently.
+  * Different devices of the same type, favor different execution configurations.
+  * Different models favor different parameter configurations (e.g., compute vs memory-bandwidth,
+    inference precision, and possible model quantization).
+  * Execution “scheduling” impacts performance strongly and is highly device specific. GPU-oriented
+    optimizations :doc:`do not always map well to the CPU <optimize-inference/optimizing-low-level-implementation>`.
 
 Additional Resources
 ####################

diff --git a/...-workflow/running-inference/optimize-inference/high-level-performance-hints.rst b/...-workflow/running-inference/optimize-inference/high-level-performance-hints.rst
@@ -21,9 +21,9 @@ The hints, in contrast, respect the actual model, so the parameters for optimal
 Performance Hints: Latency and Throughput
 #########################################
 
-As discussed in the :doc:`Optimization Guide <../optimize-inference>` there are a few different metrics associated with inference speed. Throughput and latency are some of the most widely used metrics that measure the overall performance of an application.
+As discussed in the :doc:`Optimization Guide <../optimize-inference>` there are a few different metrics associated with inference speed. Latency and throughput are some of the most widely used metrics that measure the overall performance of an application.
 
-Therefore, in order to ease the configuration of the device, OpenVINO offers two dedicated hints, namely ``ov::hint::PerformanceMode::THROUGHPUT`` and ``ov::hint::PerformanceMode::LATENCY``.
+Therefore, in order to ease the configuration of the device, OpenVINO offers two dedicated hints, namely ``ov::hint::PerformanceMode::LATENCY`` **(default)** and ``ov::hint::PerformanceMode::THROUGHPUT``.
 
 For more information on conducting performance measurements with the ``benchmark_app``, refer to the last section in this document.
 

diff --git a/src/bindings/js/node/include/compiled_model.hpp b/src/bindings/js/node/include/compiled_model.hpp
@@ -72,5 +72,11 @@ class CompiledModelWrap : public Napi::ObjectWrap<CompiledModelWrap> {
     Napi::Value export_model(const Napi::CallbackInfo& info);
 
 private:
+    /** @brief Gets node of a compiled model specified in CallbackInfo. */
+    Napi::Value get_node(const Napi::CallbackInfo& info,
+                         const ov::Output<const ov::Node>& (ov::CompiledModel::*func)() const,
+                         const ov::Output<const ov::Node>& (ov::CompiledModel::*func_tname)(const std::string&)const,
+                         const ov::Output<const ov::Node>& (ov::CompiledModel::*func_idx)(size_t) const);
+
     ov::CompiledModel _compiled_model;
 };