diff --git a/.github/actions/openvino_provider/action.yml b/.github/actions/openvino_provider/action.yml
index 131abb59b5e252..dd1078bb0d4353 100644
--- a/.github/actions/openvino_provider/action.yml
+++ b/.github/actions/openvino_provider/action.yml
@@ -9,6 +9,11 @@ inputs:
     description: 'Branch of OpenVINO to take the revision from if no specific hash was provided. 
                   Taken from github context by default'
     required: false
+  event_name:
+    description: 'Even name from which artifacts were generated. "push" by default; overwrite it with "pull_request" 
+                  if revision/branch is from PR'
+    default: "push"
+    required: false
   ov_artifact_name:
     description: "Name under which to upload provided OpenVINO build artifacts, set automatically by default"
     required: false
@@ -107,7 +112,7 @@ runs:
       with:
         platform: ${{ inputs.platform }}_${{ inputs.arch }}
         storage_root: ${{ inputs.commit_share_path }}
-        event_name: "commit"
+        event_name: ${{ inputs.event_name }}
         trigger_repo_sha: ${{ env.OV_REVISION }}
         branch_name: ${{ inputs.branch_name }}
         to_restore: ${{ inputs.commit_packages_to_provide }}
@@ -137,7 +142,7 @@ runs:
         artifacts_path=${{ steps.openvino_commit_download.outputs.artifacts_path }}
         cd $artifacts_path
         version=$(yq eval '.components.dldt.custom_params.wheel_product_version' manifest.yml)
-        wheel_path=${{ inputs.install_dir && '$artifacts_path/tools' || './tools' }}
+        wheel_path=${{ inputs.install_dir && '$artifacts_path/wheels' || './wheels' }}
         default_find_links_cmd="--find-links=$wheel_path"
         find_links_cmd=$([[ -n "$PIP_FIND_LINKS" ]] && echo "" || echo "$default_find_links_cmd")
         echo "ov_version=$version" >> $GITHUB_OUTPUT
diff --git a/.github/actions/restore_artifacts/restore_artifacts.py b/.github/actions/restore_artifacts/restore_artifacts.py
index d79388a27f5616..007bc56aafe9b2 100644
--- a/.github/actions/restore_artifacts/restore_artifacts.py
+++ b/.github/actions/restore_artifacts/restore_artifacts.py
@@ -33,10 +33,10 @@ def include_filter(include_list: set | list):
     """
     Returns input for shutil.copytree ignore - to copy only files from include list
     """
-    def _filter(_, files: list):
+    def _filter(root, files: list):
         if not include_list:
             return []
-        return [f for f in files if f not in include_list]
+        return [f for f in files if f not in include_list and Path(root).name not in include_list]
 
     return _filter
 
diff --git a/.github/labeler.yml b/.github/labeler.yml
index 310eb2d88a66f8..5421d669ed224f 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -150,6 +150,7 @@
 - 'tests/layer_tests/tensorflow_tests/**/*'
 - 'tests/layer_tests/tensorflow2_keras_tests/**/*'
 - 'tests/layer_tests/jax_tests/**/*'
+- 'tests/requirements_tensorflow'
 - any: ['tests/model_hub_tests/**',
         '!tests/model_hub_tests/pytorch/**/*',
         '!tests/model_hub_tests/jax/**/*']
@@ -165,6 +166,7 @@
 - 'src/bindings/python/src/openvino/frontend/pytorch/**/*'
 - 'tests/layer_tests/py_frontend_tests/test_torch_decoder.py'
 - 'tests/layer_tests/py_frontend_tests/test_torch_frontend.py'
+- 'tests/requirements_pytorch'
 - any: ['tests/model_hub_tests/**',
         '!tests/model_hub_tests/tensorflow/**/*',
         '!tests/model_hub_tests/jax/**/*']
diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml
index 0cb7cfb93e16f0..d58e879c736610 100644
--- a/.github/workflows/job_build_linux.yml
+++ b/.github/workflows/job_build_linux.yml
@@ -82,6 +82,7 @@ jobs:
       INSTALL_DIR: /__w/openvino/openvino/openvino_install
       INSTALL_DIR_JS: /__w/openvino/openvino/openvino_install/js
       INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install
+      INSTALL_WHEELS_DIR: /__w/openvino/openvino/install/wheels
       DEVELOPER_PACKAGE_DIR: /__w/openvino/openvino/developer_package_install
       BUILD_DIR: /__w/openvino/openvino/openvino_build
       SCCACHE_AZURE_KEY_PREFIX: ${{ inputs.os }}_${{ inputs.arch }}_Release
@@ -174,25 +175,24 @@ jobs:
 
       - name: Cmake install - OpenVINO
         run: |
-          cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake
-          cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake
-          cmake -DCMAKE_INSTALL_PREFIX=${DEVELOPER_PACKAGE_DIR} -DCOMPONENT=developer_package -P ${BUILD_DIR}/cmake_install.cmake
-          cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake
-
-      - name: Pack Artifacts
-        run: |
-          pushd ${INSTALL_DIR}
-            tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz *
-          popd
-
-          pushd ${DEVELOPER_PACKAGE_DIR}
-            tar -I pigz -cvf ${BUILD_DIR}/openvino_developer_package.tar.gz *
-          popd
-
-          pushd ${INSTALL_TEST_DIR}
-            tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz *
-          popd
-
+          cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${INSTALL_DIR}
+          cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${INSTALL_WHEELS_DIR} --component python_wheels
+          cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${INSTALL_TEST_DIR} --component tests
+          cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${DEVELOPER_PACKAGE_DIR} --component developer_package
+        working-directory: ${{ env.BUILD_DIR }}
+          
+      - name: Pack openvino_package
+        run: tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz *
+        working-directory: ${{ env.INSTALL_DIR }}
+        
+      - name: Pack openvino_developer_package
+        run: tar -I pigz -cvf ${BUILD_DIR}/openvino_developer_package.tar.gz *
+        working-directory: ${{ env.DEVELOPER_PACKAGE_DIR }}
+        
+      - name: Pack openvino_tests
+        run: tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz *
+        working-directory: ${{ env.INSTALL_TEST_DIR }}
+      
       - name: Build Debian packages
         if: ${{ inputs.build-debian-packages }}
         run: |
@@ -205,6 +205,7 @@ jobs:
                 -UTBB* \
                 -DENABLE_SYSTEM_TBB=ON \
                 -DENABLE_PYTHON_PACKAGING=ON \
+                -DENABLE_WHEEL=OFF \
                 -DENABLE_TESTS=OFF \
                 -DPython3_EXECUTABLE=$python_exec \
                 -DCPACK_GENERATOR=DEB \
@@ -217,6 +218,7 @@ jobs:
           cmake \
             -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" \
             -DOPENVINO_EXTRA_MODULES="${OPENVINO_CONTRIB_REPO}/modules/java_api;${OPENVINO_CONTRIB_REPO}/modules/custom_operations" \
+            -DENABLE_WHEEL=OFF \
             -S ${OPENVINO_REPO} \
             -B ${BUILD_DIR}
           cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }}
@@ -224,9 +226,12 @@ jobs:
       - name: CMake configure, build and install - OpenVINO JS API
         if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }}
         run: |
-          cmake -UTBB* -DCPACK_GENERATOR=NPM -DENABLE_SYSTEM_TBB=OFF -S ${OPENVINO_REPO} -B ${BUILD_DIR}
+          cmake -UTBB* -S ${OPENVINO_REPO} -B ${BUILD_DIR} \
+                -DCPACK_GENERATOR=NPM \
+                -DENABLE_SYSTEM_TBB=OFF \
+                -DENABLE_WHEEL=OFF
           cmake --build ${BUILD_DIR} --parallel
-          cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR_JS} -P ${BUILD_DIR}/cmake_install.cmake
+          cmake --install ${BUILD_DIR} --prefix ${INSTALL_DIR_JS}
 
       - name: Build RPM packages
         if: ${{ inputs.build-rpm-packages }}
@@ -235,6 +240,7 @@ jobs:
                 -DCPACK_GENERATOR=RPM \
                 -DENABLE_SYSTEM_TBB=ON \
                 -DENABLE_PYTHON_PACKAGING=ON \
+                -DENABLE_WHEEL=OFF \
                 -DENABLE_TESTS=OFF \
                 ${BUILD_DIR}
           cmake --build ${BUILD_DIR} --parallel --target package --verbose
@@ -257,7 +263,15 @@ jobs:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
           if-no-files-found: 'error'
-
+          
+      - name: Upload openvino wheels
+        if: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }}
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        with:
+          name: openvino_wheels
+          path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl
+          if-no-files-found: 'error'
+        
       - name: Upload openvino js package
         if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }}
         uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
@@ -317,5 +331,8 @@ jobs:
             ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
             ${{ env.BUILD_DIR }}/deb
             ${{ env.MANIFEST_PATH }}
+            ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.BUILD_DIR) || '' }}
           storage_dir: ${{ env.PRODUCT_TYPE }}
           storage_root: ${{ env.ARTIFACTS_SHARE }}
+        env:
+          STORE_WHEELS: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }}
diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml
index 66301cee1f1046..c8e249513a08f0 100644
--- a/.github/workflows/job_build_windows.yml
+++ b/.github/workflows/job_build_windows.yml
@@ -44,6 +44,7 @@ jobs:
       INSTALL_DIR: "${{ github.workspace }}\\openvino_install"
       INSTALL_DIR_JS: "${{ github.workspace }}\\openvino_install\\js"
       INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install"
+      INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels"
       BUILD_DIR: "${{ github.workspace }}\\openvino_build"
       ARTIFACTS_SHARE: "C:\\mount\\build-artifacts"
       MANIFEST_PATH: "${{ github.workspace }}\\manifest.yml"
@@ -179,13 +180,14 @@ jobs:
 
       - name: Cmake install - OpenVINO
         run: |
-          cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake
-          cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake
-          cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake
+          cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_DIR }}
+          cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels
+          cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_TEST_DIR }} --component tests
+        working-directory: ${{ env.BUILD_DIR }}
 
       - name: Pack Artifacts
         run: |
-          $file=Get-ChildItem -Path "${{ env.INSTALL_DIR }}"
+          $file = Get-ChildItem -Path "${{ env.INSTALL_DIR }}"
           $compress = @{
             Path = $file
             CompressionLevel = "Optimal"
@@ -204,9 +206,12 @@ jobs:
       - name: CMake configure, build and install - OpenVINO JS API
         if: ${{ fromJSON(inputs.affected-components).JS_API }}
         run: |
-          cmake -DCPACK_GENERATOR=NPM -DENABLE_SYSTEM_TBB=OFF -UTBB* -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }}
+          cmake -UTBB* -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} `
+                -DCPACK_GENERATOR=NPM `
+                -DENABLE_SYSTEM_TBB=OFF `
+                -DENABLE_WHEEL=OFF
           cmake --build ${{ env.BUILD_DIR }} --parallel
-          cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR_JS }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake
+          cmake --install ${{ env.BUILD_DIR }} --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_DIR_JS }}
 
       #
       # Upload build artifacts and logs
@@ -218,6 +223,13 @@ jobs:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.zip
           if-no-files-found: 'error'
+          
+      - name: Upload openvino wheels
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        with:
+          name: openvino_wheels
+          path: ${{ env.BUILD_DIR }}/wheels/*.whl
+          if-no-files-found: 'error'
 
       - name: Upload openvino tests package
         uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
@@ -242,6 +254,7 @@ jobs:
           artifacts: |
             ${{ env.BUILD_DIR }}/openvino_package.zip
             ${{ env.BUILD_DIR }}/openvino_tests.zip
+            ${{ env.INSTALL_WHEELS_DIR }}/wheels
             ${{ env.MANIFEST_PATH }}
           storage_dir: ${{ env.PRODUCT_TYPE }}
           storage_root: ${{ env.ARTIFACTS_SHARE }}
diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml
index 8f9292d35fb803..2fed97a78e9c07 100644
--- a/.github/workflows/job_jax_models_tests.yml
+++ b/.github/workflows/job_jax_models_tests.yml
@@ -33,25 +33,22 @@ jobs:
       OPENVINO_REPO: ${{ github.workspace }}/openvino
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tokenizers extension
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tokenizers_wheel
-          path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tests package
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
+          pattern: openvino_[tokenizers_wheel|wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
 
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
@@ -60,16 +57,11 @@ jobs:
           echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV"
           echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV"
           echo "MODEL_HUB_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/model_hub_tests" >> "$GITHUB_ENV"
-
-      - name: Extract OpenVINO packages
+          
+      - name: Extract OpenVINO packages and tests
         run: |
-          pushd ${INSTALL_DIR}
-            tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR}
-          popd
-
-          pushd ${INSTALL_TEST_DIR}
             tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
-          popd
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -90,8 +82,9 @@ jobs:
         run: |
           # To enable pytest parallel features
           python3 -m pip install pytest-xdist[psutil]
-          python3 -m pip install ${INSTALL_DIR}/tools/openvino-*
-          python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*
+          python3 -m pip install ./openvino-*
+          python3 -m pip install ./openvino_tokenizers-*
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install JAX tests requirements for precommit
         run: |
diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml
index ffc4da8ef87b54..0eda00f7afb937 100644
--- a/.github/workflows/job_onnx_models_tests.yml
+++ b/.github/workflows/job_onnx_models_tests.yml
@@ -28,6 +28,7 @@ jobs:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       OPENVINO_REPO: ${{ github.workspace }}/openvino
       INSTALL_DIR: ${{ github.workspace }}/install
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
       ONNX_MODELS_PATH: ${{ github.workspace }}/onnx_test_models
       # instead of using static MODELS_SHARE_PATH
@@ -37,18 +38,20 @@ jobs:
       ONNX_MODEL_ZOO_SHA: "5faef4c33eba0395177850e1e31c4a6a9e634c82"
     if: ${{ github.event_name != 'merge_group' }}
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tests)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tests package
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
-
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
+        
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
         run: |
@@ -59,15 +62,10 @@ jobs:
           echo $MODELS_SHARE_PATH
           echo "LOGS_FOLDER=$GITHUB_WORKSPACE/onnx_models_tests_logs" >> "$GITHUB_ENV"
 
-      - name: Extract OpenVINO packages
+      - name: Extract OpenVINO packages and tests
         run: |
-          pushd ${INSTALL_DIR}
-            tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR}
-          popd
-
-          pushd ${INSTALL_TEST_DIR}
             tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
-          popd
+        working-directory: ${{ env.INSTALL_DIR }}
 
       # Issue 148922
       # Can be a possible root cause for the bug
@@ -87,15 +85,14 @@ jobs:
       - name: Install OpenVINO Python wheels
         run: |
           # Install the core OV wheel
-          python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl
+          python3 -m pip install ./openvino-*.whl
 
           extras_to_install="onnx"
 
           # Find and install OV dev wheel
-          pushd ${INSTALL_DIR}/tools
-            ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl')
-            python3 -m pip install $ov_dev_wheel_name[$extras_to_install]
-          popd
+          ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl')
+          python3 -m pip install $ov_dev_wheel_name[$extras_to_install]
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install Python tests dependencies
         run: |
diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml
index e6ba39fdb3bfe3..8db2ebf86dca91 100644
--- a/.github/workflows/job_python_unit_tests.yml
+++ b/.github/workflows/job_python_unit_tests.yml
@@ -38,22 +38,25 @@ jobs:
     env:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       OPENVINO_REPO: ${{ github.workspace }}/openvino
-      INSTALL_DIR: ${{ github.workspace }}/install
+      INSTALL_DIR: ${{ github.workspace }}/install      
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
       LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests
     steps:
 
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tests package
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
 
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
@@ -63,14 +66,10 @@ jobs:
           echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV"
           echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV"
 
-      - name: Extract OpenVINO packages
+      - name: Extract OpenVINO artifacts
         run: |
-          pushd $INSTALL_DIR
-            tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR
-          popd
-          pushd $INSTALL_TEST_DIR
-            tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR
-          popd
+            tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -95,15 +94,14 @@ jobs:
       - name: Install OpenVINO Python wheels
         run: |
           # Install the core OV wheel
-          python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl
+          python3 -m pip install ./openvino-*.whl
 
           extras_to_install="caffe,kaldi,onnx,tensorflow2,pytorch"
 
           # Find and install OV dev wheel
-          pushd ${INSTALL_DIR}/tools
-            ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl')
-            python3 -m pip install $ov_dev_wheel_name[$extras_to_install]
-          popd
+          ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl')
+          python3 -m pip install $ov_dev_wheel_name[$extras_to_install]
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install Python API tests dependencies
         run: |
diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index cf8514a7cd6707..c4f0d1efb37c75 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -40,19 +40,22 @@ jobs:
       OPENVINO_REPO: ${{ github.workspace }}/openvino
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tests package
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
 
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
@@ -62,26 +65,18 @@ jobs:
           echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV"
           echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV"
           echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV"
-
-      - name: Extract OpenVINO packages (Linux, macOS)
+         
+      - name: Extract OpenVINO artifacts (Linux, macOS)
         if: runner.os != 'Windows'
         run: |
-          pushd $INSTALL_DIR
-            tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR
-          popd
-          pushd $INSTALL_TEST_DIR
-            tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR
-          popd
+            tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
+        working-directory: ${{ env.INSTALL_DIR }}
 
-      - name: Extract OpenVINO packages (Windows)
+      - name: Extract OpenVINO artifacts (Windows)
         if: runner.os == 'Windows'
         run: |
-          pushd ${{ env.INSTALL_DIR }}
-            Expand-Archive openvino_package.zip -DestinationPath ${{ env.INSTALL_DIR }}
-          popd
-          pushd ${{ env.INSTALL_TEST_DIR }}
             Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }}
-          popd
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -103,14 +98,16 @@ jobs:
         if: runner.os != 'Windows'
         run: |
           # Install the core OV wheel
-          python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl
+          python3 -m pip install ./openvino-*.whl
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install OpenVINO Python wheels (Windows)
         if: runner.os == 'Windows'
         run: |
           # Find and install the core OV wheel
-          $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }}\tools -Filter openvino-*.whl | % { $_.FullName }
+          $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName }
           python3 -m pip install "$ovCoreWheelPath"
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install Pytorch Layer tests dependencies
         run: |
diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml
index bcbcad872b42e1..ce40dd7f0618ce 100644
--- a/.github/workflows/job_pytorch_models_tests.yml
+++ b/.github/workflows/job_pytorch_models_tests.yml
@@ -33,6 +33,7 @@ jobs:
       OPENVINO_REPO: ${{ github.workspace }}/openvino
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests
     steps:
       - name: Check sudo
@@ -48,23 +49,19 @@ jobs:
             sudo sh -c "echo 'Acquire::Retries \"10\";' >> /etc/apt/apt.conf.d/80-retries"
           fi
 
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tokenizers extension
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: openvino_tokenizers_wheel
-          path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tests package
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
+          pattern: openvino_[wheels|tokenizers_wheel]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
 
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
@@ -74,15 +71,10 @@ jobs:
           echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV"
           echo "MODEL_HUB_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/model_hub_tests" >> "$GITHUB_ENV"
 
-      - name: Extract OpenVINO packages
+      - name: Extract OpenVINO artifacts
         run: |
-          pushd ${INSTALL_DIR}
-            tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR}
-          popd
-
-          pushd ${INSTALL_TEST_DIR}
             tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
-          popd
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -109,8 +101,9 @@ jobs:
       - name: Install OpenVINO Python wheels
         run: |
           # To enable pytest parallel features
-          python3 -m pip install ${INSTALL_DIR}/tools/openvino-*
-          python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*
+          python3 -m pip install ./openvino-*
+          python3 -m pip install ./openvino_tokenizers-*
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install PyTorch tests requirements for precommit
         if: ${{ inputs.model_scope == 'precommit' }}
diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml
index cc314ee93ee876..12c63644d7b586 100644
--- a/.github/workflows/job_samples_tests.yml
+++ b/.github/workflows/job_samples_tests.yml
@@ -31,20 +31,23 @@ jobs:
       DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       BUILD_DIR: ${{ github.workspace }}/build
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs) 
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[package|tests]*
           path: ${{ env.INSTALL_DIR }}
+          merge-multiple: true
 
-      - name: Download OpenVINO tests package
+      - name: Download OpenVINO artifacts (wheels) 
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
-
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true          
+            
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
         run: |
@@ -52,14 +55,11 @@ jobs:
           echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV"
           echo "BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV"
 
-      - name: Extract OpenVINO packages
+      - name: Extract OpenVINO packages, wheels and tests
         run: |
-          pushd $INSTALL_DIR
-            tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR
-          popd
-          pushd $INSTALL_TEST_DIR
-            tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR
-          popd
+            tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR}
+            tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Install OpenVINO dependencies (mac)
         if: runner.os == 'macOS'
@@ -122,7 +122,7 @@ jobs:
           export SHARE=$INSTALL_TEST_DIR/smoke_tests/samples_smoke_tests_data
 
           # Install Python benchmark_app by installing openvino-*.whl
-          python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt $INSTALL_DIR/tools/openvino-*.whl
+          python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt $INSTALL_WHEELS_DIR/openvino-*.whl
           export LD_LIBRARY_PATH=${IE_APP_PATH}:$LD_LIBRARY_PATH
 
           source ${INSTALL_DIR}/setupvars.sh
diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml
index cc9e2781923c33..3ad19d3301945f 100644
--- a/.github/workflows/job_tensorflow_layer_tests.yml
+++ b/.github/workflows/job_tensorflow_layer_tests.yml
@@ -40,25 +40,22 @@ jobs:
       OPENVINO_REPO: ${{ github.workspace }}/openvino
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tests package
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
-
-      - name: Download OpenVINO tokenizers extension
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tokenizers_wheel
-          path: ${{ env.INSTALL_DIR }}
+          pattern: openvino_[wheels|openvino_tokenizers]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
 
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
@@ -69,25 +66,17 @@ jobs:
           echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV"
           echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV"
 
-      - name: Extract OpenVINO packages (Linux, macOS)
+      - name: Extract OpenVINO artifacts (Linux and macOS)
         if: runner.os != 'Windows'
         run: |
-          pushd $INSTALL_DIR
-            tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR
-          popd
-          pushd $INSTALL_TEST_DIR
-            tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR
-          popd
-
-      - name: Extract OpenVINO packages (Windows)
+            tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
+        working-directory: ${{ env.INSTALL_DIR }}
+
+      - name: Extract OpenVINO artifacts (Windows)
         if: runner.os == 'Windows'
         run: |
-          pushd ${{ env.INSTALL_DIR }}
-            Expand-Archive openvino_package.zip -DestinationPath ${{ env.INSTALL_DIR }}
-          popd
-          pushd ${{ env.INSTALL_TEST_DIR }}
             Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }}
-          popd
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -109,21 +98,23 @@ jobs:
         if: runner.os != 'Windows'
         run: |
           # Install the core OV wheel
-          python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl
+          python3 -m pip install ./openvino-*.whl
 
           # Install the core OV Tokenizers wheel
-          python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*.whl
+          python3 -m pip install ./openvino_tokenizers-*.whl
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install OpenVINO Python wheels (Windows)
         if: runner.os == 'Windows'
         run: |
           # Find and install the core OV wheel
-          $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }}\tools -Filter openvino-*.whl | % { $_.FullName }
+          $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName }
           python3 -m pip install "$ovCoreWheelPath"
 
           # Find and install the core OV Tokenizers wheel
-          $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }} -Filter openvino_tokenizers-*.whl | % { $_.FullName }
+          $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino_tokenizers-*.whl | % { $_.FullName }
           python3 -m pip install "$ovCoreWheelPath"
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install Python Layer tests dependencies
         run: |
diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml
index b2cdf5a6336db0..76ee01cc76c3ef 100644
--- a/.github/workflows/job_tensorflow_models_tests.yml
+++ b/.github/workflows/job_tensorflow_models_tests.yml
@@ -33,27 +33,24 @@ jobs:
       OPENVINO_REPO: ${{ github.workspace }}/openvino
       INSTALL_DIR: ${{ github.workspace }}/install
       INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests
       NUMBER_OF_REPLICAS: 2
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tokenizers extension
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tokenizers_wheel
-          path: ${{ env.INSTALL_DIR }}
-
-      - name: Download OpenVINO tests package
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
-
+          pattern: openvino_[wheels|tokenizers_wheel]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
+              
       # Needed as ${{ github.workspace }} is not working correctly when using Docker
       - name: Setup Variables
         run: |
@@ -66,15 +63,10 @@ jobs:
           echo "HF_HUB_CACHE=/mount/testdata$((GITHUB_RUN_NUMBER % NUMBER_OF_REPLICAS))/hugging_face" >> "$GITHUB_ENV"
           echo $HF_HUB_CACHE
 
-      - name: Extract OpenVINO packages
+      - name: Extract OpenVINO artifacts (Linux and macOS)
         run: |
-          pushd ${INSTALL_DIR}
-            tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR}
-          popd
-
-          pushd ${INSTALL_TEST_DIR}
-            tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR}
-          popd
+          tar -I pigz -xf openvino_tests.tar.gz -C .
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -93,8 +85,9 @@ jobs:
 
       - name: Install OpenVINO Python wheels
         run: |
-          python3 -m pip install ${INSTALL_DIR}/tools/openvino-*
-          python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*
+          python3 -m pip install ./openvino-*
+          python3 -m pip install ./openvino_tokenizers-*
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install TF Models tests requirements
         run: python3 -m pip install -r ${INSTALL_TEST_DIR}/requirements_tensorflow
diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml
index c01c2740201384..089b104d7af1d1 100644
--- a/.github/workflows/job_tokenizers.yml
+++ b/.github/workflows/job_tokenizers.yml
@@ -39,6 +39,7 @@ jobs:
     container: ${{ fromJSON(inputs.container) }}
     env:
       INSTALL_DIR: ${{ github.workspace }}/install
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
       OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}/openvino_tokenizers
       EXTENSION_BUILD_DIR: ${{ github.workspace }}/build
 
@@ -73,26 +74,13 @@ jobs:
           path: ${{ env.OPENVINO_TOKENIZERS_REPO }}
           ref: ${{ env.TARGET_BRANCH }}
 
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
-          path: ${{ env.INSTALL_DIR }}
-
-      - name: Extract OpenVINO packages (Linux and macOS)
-        if: runner.os != 'Windows'
-        run: |
-          pushd ${INSTALL_DIR}
-            tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR}
-          popd
-
-      - name: Extract OpenVINO packages (Windows)
-        if: runner.os == 'Windows'
-        run: |
-          pushd ${{ env.INSTALL_DIR }}
-            Expand-Archive openvino_package.zip -DestinationPath "${{ env.INSTALL_DIR }}"
-          popd
-
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
+          
       #
       # Dependencies
       #
@@ -101,17 +89,18 @@ jobs:
         if: runner.os != 'Windows'
         run: |
           # Find and install wheel
-          pushd ${INSTALL_DIR}/tools
-            wheel_name=$(find . -name 'openvino-*.whl')
-            python3 -m pip install $wheel_name
-          popd
+          wheel_name=$(find . -name 'openvino-*.whl')
+          python3 -m pip install $wheel_name
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
+          
 
       - name: Install OpenVINO Python wheel (Windows)
         if: runner.os == 'Windows'
         run: |
           # Find and install wheel
-          $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\\tools" -Filter openvino-*.whl | % { $_.FullName }
+          $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName }
           python3 -m pip install "$ovCoreWheelPath"
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       #
       # Build
@@ -119,16 +108,18 @@ jobs:
 
       - name: Build tokenizers wheel (Linux and macOS)
         if: runner.os != 'Windows'
+        working-directory: ${{ env.OPENVINO_TOKENIZERS_REPO }}
         run: |
           # use OpenVINO wheel package only to build the extension
-          python -m pip wheel -v --no-deps --wheel-dir ${EXTENSION_BUILD_DIR} --find-links ${INSTALL_DIR}/tools ${OPENVINO_TOKENIZERS_REPO}
+          python -m pip wheel -v --no-deps --wheel-dir ${EXTENSION_BUILD_DIR} --find-links ${INSTALL_WHEELS_DIR} .
         env:
           CMAKE_BUILD_PARALLEL_LEVEL: '4'
 
       - name: Build tokenizers wheel (Windows)
         if: runner.os == 'Windows'
+        working-directory: ${{ env.OPENVINO_TOKENIZERS_REPO }}
         run: |
-          python3 -m pip wheel -v --no-deps --wheel-dir ${env:EXTENSION_BUILD_DIR} --find-links ${env:INSTALL_DIR}/tools ${env:OPENVINO_TOKENIZERS_REPO}
+          python3 -m pip wheel -v --no-deps --wheel-dir ${env:EXTENSION_BUILD_DIR} --find-links ${env:INSTALL_WHEELS_DIR} .
         env:
           CMAKE_BUILD_PARALLEL_LEVEL: '4'
 
diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml
index 32bdf5148ec540..a0871712285bf6 100644
--- a/.github/workflows/windows_vs2019_release.yml
+++ b/.github/workflows/windows_vs2019_release.yml
@@ -72,30 +72,30 @@ jobs:
       OPENVINO_REPO: "${{ github.workspace }}\\openvino"
       INSTALL_DIR: "${{ github.workspace }}\\install"
       INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests"
+      INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels"
       SAMPLES_INSTALL_DIR: "${{ github.workspace }}\\install\\samples"
       BUILD_DIR: "${{ github.workspace }}\\build"
 
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[package|tests]*
           path: ${{ env.INSTALL_DIR }}
+          merge-multiple: true
 
-      - name: Download OpenVINO tests package
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
 
       - name: Extract OpenVINO packages
         run: |
-          pushd ${{ env.INSTALL_DIR }}
-            Expand-Archive openvino_package.zip -DestinationPath "${{ env.INSTALL_DIR }}"
-          popd
-          pushd ${{ env.INSTALL_TEST_DIR }}
-            Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}"
-          popd
+            Expand-Archive openvino_package.zip -DestinationPath .
+            Expand-Archive openvino_tests.zip -DestinationPath .
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -127,16 +127,17 @@ jobs:
       - name: Samples tests
         run: |
           # Install Python benchmark_app by installing openvino-*.whl
-          $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino-*.whl | % { $_.FullName }
-          python3 -m pip install --ignore-installed PyYAML -r ${{ env.INSTALL_TEST_DIR }}/smoke_tests/requirements.txt "$ovCoreWheelPath"
-          . "${{ env.INSTALL_DIR }}/setupvars.ps1"
+          $ovCoreWheelPath=Get-ChildItem -Path ./wheels -Filter openvino-*.whl | % { $_.FullName }
+          python3 -m pip install --ignore-installed PyYAML -r ./tests/smoke_tests/requirements.txt "$ovCoreWheelPath"
+          . "./setupvars.ps1"
           $Env:PYTHONCOERCECLOCALE="warn"
-          python3 -bb -W error -X dev -X warn_default_encoding -m pytest ${{ env.INSTALL_TEST_DIR }}/smoke_tests --numprocesses auto
+          python3 -bb -W error -X dev -X warn_default_encoding -m pytest ./tests/smoke_tests --numprocesses auto
         env:
           IE_APP_PATH: ${{ env.INSTALL_DIR }}/samples_bin
           IE_APP_PYTHON_PATH: ${{ env.INSTALL_DIR }}/samples/python
           SHARE: ${{ env.INSTALL_TEST_DIR }}/smoke_tests/samples_smoke_tests_data
           WORKSPACE: ${{ env.INSTALL_DIR }}
+        working-directory: ${{ env.INSTALL_DIR }}
 
       # Test .bat scripts for samples building
       - name: Build cpp samples (bat)
@@ -237,30 +238,29 @@ jobs:
       OPENVINO_REPO: "${{ github.workspace }}\\openvino"
       INSTALL_DIR: "${{ github.workspace }}\\install"
       INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests"
+      INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels"
       LAYER_TESTS_INSTALL_DIR: "${{ github.workspace }}\\install\\tests\\layer_tests"
       PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin"
 
     steps:
-      - name: Download OpenVINO package
+      - name: Download OpenVINO artifacts (tarballs)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_package
+          pattern: openvino_[tests]*
           path: ${{ env.INSTALL_DIR }}
+          merge-multiple: true
 
-      - name: Download OpenVINO tests package
+      - name: Download OpenVINO artifacts (wheels)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
-          name: openvino_tests
-          path: ${{ env.INSTALL_TEST_DIR }}
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
 
       - name: Extract OpenVINO packages
         run: |
-          pushd ${{ env.INSTALL_DIR }}
-            Expand-Archive openvino_package.zip -DestinationPath "${{ env.INSTALL_DIR }}"
-          popd
-          pushd ${{ env.INSTALL_TEST_DIR }}
-            Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}"
-          popd
+            Expand-Archive openvino_tests.zip -DestinationPath .
+        working-directory: ${{ env.INSTALL_DIR }}
 
       - name: Fetch setup_python action
         uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
@@ -281,12 +281,13 @@ jobs:
       - name: Install OpenVINO Python wheels
         run: |
           # Find and install the core OV wheel
-          $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino-*.whl | % { $_.FullName }
+          $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName }
           python3 -m pip install "$ovCoreWheelPath"
 
           # Find and install the dev OV wheel
-          $ovDevWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino_dev*.whl | % { $_.FullName }
+          $ovDevWheelPath=Get-ChildItem -Path . -Filter openvino_dev*.whl | % { $_.FullName }
           python3 -m pip install "$ovDevWheelPath[caffe,kaldi,onnx,tensorflow2,pytorch]"
+        working-directory: ${{ env.INSTALL_WHEELS_DIR }}
 
       - name: Install Python API tests dependencies
         run: |
@@ -382,7 +383,6 @@ jobs:
         if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test ||
             fromJSON(needs.smart_ci.outputs.affected_components).PDPD_FE.test
         run: |
-          . "${{ env.INSTALL_DIR }}/setupvars.ps1"
           python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/py_frontend_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_py_fontend.xml
 
       - name: OVC unit tests
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c68e3a611b39ba..e9e8d3724d9ac5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,11 @@ if(ENABLE_COVERAGE)
     include(cmake/coverage.cmake)
 endif()
 
+if(APPLE AND CMAKE_OSX_DEPLOYMENT_TARGET AND
+    CMAKE_OSX_DEPLOYMENT_TARGET VERSION_LESS 10.15)
+    message(FATAL_ERROR "OpenVINO requires MACOSX_DEPLOYMENT_TARGET at least 10.15, specified ${CMAKE_OSX_DEPLOYMENT_TARGET}")
+endif()
+
 # resolving dependencies for the project
 message (STATUS "CMAKE_VERSION ......................... " ${CMAKE_VERSION})
 message (STATUS "CMAKE_CROSSCOMPILING .................. " ${CMAKE_CROSSCOMPILING})
diff --git a/cmake/developer_package/packaging/archive.cmake b/cmake/developer_package/packaging/archive.cmake
index 6df5145ae2e488..5978ec73052970 100644
--- a/cmake/developer_package/packaging/archive.cmake
+++ b/cmake/developer_package/packaging/archive.cmake
@@ -30,8 +30,7 @@ macro(ov_archive_cpack_set_dirs)
     set(OV_CPACK_DOCDIR docs)
     set(OV_CPACK_LICENSESDIR licenses)
     set(OV_CPACK_SAMPLESDIR samples)
-    set(OV_CPACK_WHEELSDIR tools)
-    set(OV_CPACK_TOOLSDIR tools)
+    set(OV_CPACK_WHEELSDIR wheels)
     set(OV_CPACK_DEVREQDIR tools)
     set(OV_CPACK_PYTHONDIR python)
 
@@ -87,12 +86,11 @@ macro(ov_define_component_include_rules)
     unset(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL)
     unset(OV_CPACK_COMP_OVC_EXCLUDE_ALL)
     set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL)
-    unset(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL)
+    # we don't need wheels in the distribution packages 
+    set(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     unset(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL)
     # nodejs
     set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL)
-    # tools
-    set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # scripts
     unset(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL)
     unset(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL)
diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake
index 581c9144c1b907..0ec054da853e2c 100644
--- a/cmake/developer_package/packaging/common-libraries.cmake
+++ b/cmake/developer_package/packaging/common-libraries.cmake
@@ -11,7 +11,6 @@ include(GNUInstallDirs)
 #
 macro(ov_common_libraries_cpack_set_dirs)
     # override default locations for common libraries
-    set(OV_CPACK_TOOLSDIR ${CMAKE_INSTALL_BINDIR}) # only C++ tools are here
     set(OV_CPACK_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR})
     set(OV_CPACK_LIBRARYDIR ${CMAKE_INSTALL_LIBDIR})
     if(WIN32)
@@ -42,7 +41,7 @@ macro(ov_common_libraries_cpack_set_dirs)
     unset(OV_CPACK_SHAREDIR)
 
     # skipped during common libraries packaging
-    set(OV_CPACK_WHEELSDIR "tools")
+    set(OV_CPACK_WHEELSDIR "wheels")
 endmacro()
 
 ov_common_libraries_cpack_set_dirs()
@@ -98,14 +97,12 @@ macro(ov_define_component_include_rules)
     # we don't pack artifacts of setup.py install, because it's called explicitly in conda / brew
     # or not used at all like in cases with conan / vcpkg
     set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL})
-    # we don't need wheels in package, it's used installed only in open source distribution
+    # we don't need wheels in the distribution packages
     set(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # we don't need requirements.txt in package, because dependencies are installed by packages managers like conda
     set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # nodejs
     set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL)
-    # tools
-    set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # scripts
     set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL)
diff --git a/cmake/developer_package/packaging/debian/debian.cmake b/cmake/developer_package/packaging/debian/debian.cmake
index f133428d66ec74..2b95fcfde5c145 100644
--- a/cmake/developer_package/packaging/debian/debian.cmake
+++ b/cmake/developer_package/packaging/debian/debian.cmake
@@ -11,7 +11,6 @@ include(GNUInstallDirs)
 #
 macro(ov_debian_cpack_set_dirs)
     # override default locations for Debian
-    set(OV_CPACK_TOOLSDIR ${CMAKE_INSTALL_BINDIR}) # only C++ tools are here
     set(OV_CPACK_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR})
     set(OV_CPACK_RUNTIMEDIR ${CMAKE_INSTALL_LIBDIR})
     if(CMAKE_CROSSCOMPILING)
@@ -42,7 +41,7 @@ macro(ov_debian_cpack_set_dirs)
     unset(OV_CPACK_SHAREDIR)
 
     # skipped during debian packaging
-    set(OV_CPACK_WHEELSDIR "tools")
+    set(OV_CPACK_WHEELSDIR "wheels")
 endmacro()
 
 ov_debian_cpack_set_dirs()
@@ -111,8 +110,6 @@ macro(ov_define_component_include_rules)
     set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # nodejs
     set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL)
-    # tools
-    set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # scripts
     set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL)
diff --git a/cmake/developer_package/packaging/npm.cmake b/cmake/developer_package/packaging/npm.cmake
index 2a2509cdcae65a..a1ad45bce356ee 100644
--- a/cmake/developer_package/packaging/npm.cmake
+++ b/cmake/developer_package/packaging/npm.cmake
@@ -19,7 +19,6 @@ macro(ov_npm_cpack_set_dirs)
     set(OV_CPACK_LICENSESDIR licenses)
     set(OV_CPACK_SAMPLESDIR .)
     set(OV_CPACK_WHEELSDIR .)
-    set(OV_CPACK_TOOLSDIR .)
     set(OV_CPACK_DEVREQDIR .)
     set(OV_CPACK_PYTHONDIR .)
 
@@ -78,8 +77,6 @@ macro(ov_define_component_include_rules)
     set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # nodejs
     unset(OV_CPACK_COMP_NPM_EXCLUDE_ALL)
-    # tools
-    set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # scripts
     set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL)
diff --git a/cmake/developer_package/packaging/nsis.cmake b/cmake/developer_package/packaging/nsis.cmake
index d7d8ed152c49d9..9b2242fe1f5302 100644
--- a/cmake/developer_package/packaging/nsis.cmake
+++ b/cmake/developer_package/packaging/nsis.cmake
@@ -58,8 +58,7 @@ macro(ov_archive_cpack_set_dirs)
     set(OV_CPACK_DOCDIR docs)
     set(OV_CPACK_LICENSESDIR licenses)
     set(OV_CPACK_SAMPLESDIR samples)
-    set(OV_CPACK_WHEELSDIR tools)
-    set(OV_CPACK_TOOLSDIR tools)
+    set(OV_CPACK_WHEELSDIR wheels)
     set(OV_CPACK_DEVREQDIR tools)
     set(OV_CPACK_PYTHONDIR python)
 
@@ -133,8 +132,6 @@ macro(ov_define_component_include_rules)
     unset(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL)
     # nodejs
     set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL)
-    # tools
-    unset(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL)
     # scripts
     unset(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL)
     unset(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL)
diff --git a/cmake/developer_package/packaging/packaging.cmake b/cmake/developer_package/packaging/packaging.cmake
index d62a8ca7fe2084..478fcdf0bd2c86 100644
--- a/cmake/developer_package/packaging/packaging.cmake
+++ b/cmake/developer_package/packaging/packaging.cmake
@@ -173,8 +173,6 @@ macro(ov_define_component_names)
     set(OV_CPACK_COMP_OPENVINO_REQ_FILES "openvino_req_files")
     # nodejs
     set(OV_CPACK_COMP_NPM "ov_node_addon")
-    # tools
-    set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES "openvino_dev_req_files")
     # scripts
     set(OV_CPACK_COMP_INSTALL_DEPENDENCIES "install_dependencies")
     set(OV_CPACK_COMP_SETUPVARS "setupvars")
diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake
index b7c482555bd131..45d9b0c0ca2121 100644
--- a/cmake/developer_package/packaging/rpm/rpm.cmake
+++ b/cmake/developer_package/packaging/rpm/rpm.cmake
@@ -11,7 +11,6 @@ include(GNUInstallDirs)
 #
 macro(ov_rpm_cpack_set_dirs)
     # override default locations for RPM
-    set(OV_CPACK_TOOLSDIR ${CMAKE_INSTALL_BINDIR}) # only C++ tools are here
     set(OV_CPACK_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR})
     set(OV_CPACK_LIBRARYDIR ${CMAKE_INSTALL_LIBDIR})
     set(OV_CPACK_RUNTIMEDIR ${CMAKE_INSTALL_LIBDIR})
@@ -33,7 +32,7 @@ macro(ov_rpm_cpack_set_dirs)
     unset(OV_CPACK_SHAREDIR)
 
     # skipped during rpm packaging
-    set(OV_CPACK_WHEELSDIR "tools")
+    set(OV_CPACK_WHEELSDIR "wheels")
 endmacro()
 
 ov_rpm_cpack_set_dirs()
@@ -102,8 +101,6 @@ macro(ov_define_component_include_rules)
     set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # nodejs
     set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL)
-    # tools
-    set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     # scripts
     set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL)
     set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL)
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 797c95ef7d91c5..eedfe078cbd552 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -6,6 +6,7 @@ add_subdirectory(snippets)
 
 set(ENABLE_CPP_API OFF CACHE BOOL "Build with C/C++ API.")
 set(ENABLE_PYTHON_API OFF CACHE BOOL "Build with Python API.")
+set(ENABLE_GENAI_API OFF CACHE BOOL "Build with GenAI API.")
 set(ENABLE_NOTEBOOKS OFF CACHE BOOL "Build with openvino notebooks.")
 set(ENABLE_OMZ OFF CACHE BOOL "Build with open_model_zoo.")
 set(ENABLE_OVMS OFF CACHE BOOL "Build with ovms.")
@@ -61,11 +62,12 @@ function(build_docs)
         list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OpenVINO C/C++ API reference")
     endif()
 
-    if(${ENABLE_PYTHON_API})
+    if(${ENABLE_PYTHON_API} OR ${ENABLE_GENAI_API})
         list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "STARTED preprocessing OpenVINO Python API")
         list(APPEND commands COMMAND ${Python3_EXECUTABLE} ${OV_INSTALLATION_SCRIPT}
         --ov_dir=${SPHINX_SETUP_DIR}
-        --python=${Python3_EXECUTABLE})
+        --python=${Python3_EXECUTABLE}
+        --enable_genai=${ENABLE_GENAI_API})
         list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OpenVINO Python API")
     endif()
 
@@ -83,7 +85,7 @@ function(build_docs)
         list(APPEND commands COMMAND ${Python3_EXECUTABLE} ${FILE_HELPER_SCRIPT}
         --filetype=md
         --input_dir=${OVMS_DOCS_DIR}
-        --output_dir=${SPHINX_SOURCE_DIR}
+        --output_dir=${SPHINX_SOURCE_DIR}/openvino-workflow/model-server
         --exclude_dir=${SPHINX_SOURCE_DIR})
         list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OVMS")
     endif()
diff --git a/docs/articles_en/openvino-workflow.rst b/docs/articles_en/openvino-workflow.rst
index 90101fd1fb35e8..0dda91f91fb552 100644
--- a/docs/articles_en/openvino-workflow.rst
+++ b/docs/articles_en/openvino-workflow.rst
@@ -14,7 +14,7 @@ OpenVINO Workflow
    openvino-workflow/model-optimization
    Running Inference <openvino-workflow/running-inference>
    Deployment on a Local System  <openvino-workflow/deployment-locally>
-   Deployment on a Model Server <ovms_what_is_openvino_model_server>
+   Deployment on a Model Server <openvino-workflow/model-server/ovms_what_is_openvino_model_server>
    openvino-workflow/torch-compile
 
 
diff --git a/docs/scripts/install_appropriate_openvino_version.py b/docs/scripts/install_appropriate_openvino_version.py
index d9cbb6a9ddb1f9..262fd13d771ca2 100644
--- a/docs/scripts/install_appropriate_openvino_version.py
+++ b/docs/scripts/install_appropriate_openvino_version.py
@@ -2,64 +2,64 @@
 import argparse
 import subprocess
 import requests
-import pkg_resources
 from packaging import version
 from pathlib import Path
 
 
 def determine_openvino_version(file_path):
     pattern = r"version_name\s*=\s*['\"]([^'\"]+)['\"]"
-
     with open(file_path, 'r') as file:
         content = file.read()
-
     match = re.search(pattern, content)
-
-    if match:
-        return match.group(1)
-    else:
-        return None
+    return match.group(1) if match else None
 
 
-def get_latest_version(major_version):
-    url = f"https://pypi.org/pypi/openvino/json"
+def get_latest_version(package, major_version):
+    url = f"https://pypi.org/pypi/{package}/json"
     response = requests.get(url)
-
     if response.status_code == 200:
         data = response.json()
         versions = data['releases'].keys()
-
-        # Filter versions by the major version prefix
         matching_versions = [v for v in versions if v.startswith(major_version)]
-
-        # Sort the matching versions and return the latest one
         if matching_versions:
             matching_versions.sort(key=version.parse)
             return matching_versions[-1]
-
     return None
 
 
+def install_package(python_executable, package):
+    subprocess.check_call([f'{python_executable}', '-m', 'pip', 'install', '-U', package, '--no-cache-dir'])
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--ov_dir', type=Path, help='OpenVINO docs directory')
     parser.add_argument('--python', type=Path, help='Python executable')
+    parser.add_argument('--enable_genai', type=str, choices=['ON', 'OFF'], default='OFF', help='Enable GenAI API installation')
     args = parser.parse_args()
-    ov_dir = args.ov_dir
-    python_executable = args.python
-    version_name = determine_openvino_version(ov_dir.joinpath("conf.py"))
-
-    if version_name is None:
-        ov_version = "openvino"
-    elif version_name == "nightly":
-        ov_version = "openvino-nightly"
+    
+    version_name = determine_openvino_version(args.ov_dir.joinpath("conf.py"))
+
+    if version_name == "nightly":
+        install_package(args.python, "openvino-nightly")
+        print("OpenVINO nightly version installed. OpenVINO GenAI nightly version is not available.")
+    elif version_name is None or version_name == "latest":
+        install_package(args.python, "openvino")
+        if args.enable_genai == 'ON':
+            install_package(args.python, "openvino-genai")
     else:
-        latest_version = get_latest_version(version_name)
-        if latest_version:
-            ov_version = f"openvino=={latest_version}"
+        ov_version = get_latest_version("openvino", version_name)
+        if ov_version:
+            install_package(args.python, f"openvino=={ov_version}")
         else:
-            ov_version = f"openvino=={version_name}"
-    subprocess.check_call([f'{python_executable}', '-m', 'pip', 'install', '-U', ov_version, '--no-cache-dir'])
+            print(f"No matching OpenVINO version found for {version_name}")
+        
+        if args.enable_genai == 'ON':
+            ov_genai_version = get_latest_version("openvino-genai", version_name)
+            if ov_genai_version:
+                install_package(args.python, f"openvino-genai=={ov_genai_version}")
+            else:
+                print(f"No matching OpenVINO GenAI version found for {version_name}")
 
 
 if __name__ == "__main__":
diff --git a/docs/scripts/tests/suppress_warnings.txt b/docs/scripts/tests/suppress_warnings.txt
index b9942f28dc02ae..993a290c6d7ea3 100644
--- a/docs/scripts/tests/suppress_warnings.txt
+++ b/docs/scripts/tests/suppress_warnings.txt
@@ -61,3 +61,4 @@ toctree contains reference to nonexisting document
 pygments lexer name
 non-consecutive header level increase
 document headings start at
+inline strong start-string without end-string
diff --git a/docs/sphinx_setup/api/api_reference.rst b/docs/sphinx_setup/api/api_reference.rst
index acf816364ca0fc..9f60573707fbde 100644
--- a/docs/sphinx_setup/api/api_reference.rst
+++ b/docs/sphinx_setup/api/api_reference.rst
@@ -15,6 +15,7 @@ API Reference
    OpenVINO Runtime C++ API <c_cpp_api/group__ov__cpp__api>
    OpenVINO Runtime C API <c_cpp_api/group__ov__c__api>
    OpenVINO Node.js API <nodejs_api/nodejs_api>
+   GenAI Python API <genai_api/api>
 
 
 
diff --git a/docs/sphinx_setup/api/genai_api/api.rst b/docs/sphinx_setup/api/genai_api/api.rst
new file mode 100644
index 00000000000000..fc5151d7781ae7
--- /dev/null
+++ b/docs/sphinx_setup/api/genai_api/api.rst
@@ -0,0 +1,12 @@
+OpenVINO GenAI API
+===================
+
+.. meta::
+   :description: Explore OpenVINO GenAI Python API and implementation of its features in Intel® Distribution of OpenVINO™ GenAI.
+
+
+.. autosummary::
+   :toctree: _autosummary
+   :template: custom-module-template.rst
+
+   openvino_genai
\ No newline at end of file
diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py
index 351a6d6c5ea8b9..148309ccbafe96 100644
--- a/docs/sphinx_setup/conf.py
+++ b/docs/sphinx_setup/conf.py
@@ -34,11 +34,21 @@
     'breathe'
     ]
 
+autodoc_mock_imports = []
+
 try:
     import openvino
 except ImportError:
-    autodoc_mock_imports = ["openvino"]
+    autodoc_mock_imports.append("openvino")
+    autodoc_mock_imports.append("openvino_genai")  # Mock openvino_genai too, as it depends on openvino
+
+if "openvino" not in autodoc_mock_imports:
+    try:
+        import openvino_genai
+    except ImportError:
+        autodoc_mock_imports.append("openvino_genai")
 
+        
 breathe_projects = {
     "openvino": "../xml/"
 }
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
index 03b91ba545dd51..c448571f1ac17a 100644
--- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
@@ -24,7 +24,8 @@ def __init__(self, data) -> None:
 
 class TorchFXPythonDecoder (Decoder):
 
-    def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
+    def __init__(self, pt_module, fx_gm=None, nodes=None,
+                 mark_node_callback=None, input_shapes=[], input_types=[], dynamic_shapes=False):
         Decoder.__init__(self)
         self.mark_node_callback = mark_node_callback
         # We store every decoder created by this decoder so that all them are not deleted until the first decoder is deleted
@@ -67,7 +68,7 @@ def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, i
                 if shape is not None:
                     new_shape = []
                     for dim in range(0, len(shape)):
-                        if (type(shape[dim]).__name__ == "SymInt"):
+                        if (dynamic_shapes or type(shape[dim]).__name__ == "SymInt"):
                             new_shape.append(-1)
                         else:
                             new_shape.append(shape[dim])
diff --git a/src/common/transformations/include/transformations/utils/gen_pattern.hpp b/src/common/transformations/include/transformations/utils/gen_pattern.hpp
index 8efe2eb575d0bd..21309e339c959c 100644
--- a/src/common/transformations/include/transformations/utils/gen_pattern.hpp
+++ b/src/common/transformations/include/transformations/utils/gen_pattern.hpp
@@ -136,6 +136,14 @@ struct values_info {
         if (all_type_pshape.empty())
             return true;
         auto index = value.get_index();
+        if (index >= all_type_pshape.size()) {
+            _VERBOSE_LOG("* mismatched vtype : value from output port ",
+                         index,
+                         ", but only ",
+                         all_type_pshape.size(),
+                         " ports are expected!");
+            return false;
+        }
         auto& item = all_type_pshape[index];
         if (!item.first.compatible(value.get_element_type()) || !item.second.compatible(value.get_partial_shape())) {
             _VERBOSE_LOG("* mismatched vtype between value & pattern : ",
diff --git a/src/common/util/include/openvino/util/mmap_object.hpp b/src/common/util/include/openvino/util/mmap_object.hpp
index 5cfc2adac1ec0f..364e1eed4ca712 100644
--- a/src/common/util/include/openvino/util/mmap_object.hpp
+++ b/src/common/util/include/openvino/util/mmap_object.hpp
@@ -9,6 +9,7 @@
 
 #pragma once
 
+#include <fstream>
 #include <memory>
 #include <string>
 
@@ -50,4 +51,17 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path);
 
 #endif  // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
 
+class MmapStream final : public std::ifstream {
+public:
+    MmapStream(const std::string& path) : std::ifstream(path, std::ios_base::binary) {
+        m_memory = ov::load_mmap_object(path);
+    }
+
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+    MmapStream(const std::wstring& path);
+#endif  // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+
+    std::shared_ptr<ov::MappedMemory> m_memory;
+};
+
 }  // namespace ov
diff --git a/src/common/util/src/os/win/win_mmap_object.cpp b/src/common/util/src/os/win/win_mmap_object.cpp
index 114ef6cd9cd6a7..0b14d7ac774700 100644
--- a/src/common/util/src/os/win/win_mmap_object.cpp
+++ b/src/common/util/src/os/win/win_mmap_object.cpp
@@ -142,6 +142,10 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path) {
     return holder;
 }
 
+MmapStream::MmapStream(const std::wstring& path) : std::ifstream(path.data(), std::ios_base::binary) {
+    m_memory = ov::load_mmap_object(path);
+}
+
 #endif
 
 }  // namespace ov
diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp
index c4420b9e3fa822..7f1e2e9ba7601f 100644
--- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp
+++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp
@@ -8,7 +8,7 @@
 
 namespace ov {
 
-/// \brief SharedBuffer class to store pointer to pre-acclocated buffer.
+/// \brief SharedBuffer class to store pointer to pre-allocated buffer.
 template <typename T>
 class SharedBuffer : public ov::AlignedBuffer {
 public:
diff --git a/src/core/shape_inference/include/search_sorted_shape_inference.hpp b/src/core/shape_inference/include/search_sorted_shape_inference.hpp
index da417f54121ee4..7ea0598cffbc87 100644
--- a/src/core/shape_inference/include/search_sorted_shape_inference.hpp
+++ b/src/core/shape_inference/include/search_sorted_shape_inference.hpp
@@ -16,18 +16,24 @@ std::vector<TRShape> shape_infer(const SearchSorted* op, const std::vector<TShap
     op->validate();
     const auto& sorted_shape = input_shapes[0];
     const auto& values_shape = input_shapes[1];
+
     auto output_shape = values_shape;
-    TShape::merge_into(output_shape, sorted_shape);
-
-    if (output_shape.rank().is_static()) {
-        auto last_it = output_shape.end() - 1;
-        if (values_shape.rank().is_static()) {
-            *last_it = *(input_shapes[1].end() - 1);
-        } else {
-            *last_it = Dimension::dynamic();
-        }
+
+    // 1. If we know that the sorted sequence is 1D, than output shape can be anything.
+    if (sorted_shape.rank().is_static() && sorted_shape.rank().get_length() == 1) {
+        return {std::move(output_shape)};
+    }
+
+    // 2. ND tensor case or rank not known.
+    auto sorted_shape_last_dynamic = sorted_shape;
+    if (sorted_shape.rank().is_static()) {
+        sorted_shape_last_dynamic[sorted_shape.rank().get_length() - 1] = Dimension::dynamic();
     }
 
+    const bool sorted_values_merge_success = TShape::merge_into(output_shape, sorted_shape_last_dynamic);
+
+    NODE_VALIDATION_CHECK(op, sorted_values_merge_success, "Shapes of sorted sequence and values are not compatible.");
+
     return {std::move(output_shape)};
 }
 }  // namespace v15
diff --git a/src/core/src/op/search_sorted.cpp b/src/core/src/op/search_sorted.cpp
index df179d925d054a..d3f26a674eef91 100644
--- a/src/core/src/op/search_sorted.cpp
+++ b/src/core/src/op/search_sorted.cpp
@@ -21,7 +21,7 @@ SearchSorted::SearchSorted(const Output<Node>& sorted_sequence, const Output<Nod
 bool SearchSorted::validate() const {
     NODE_VALIDATION_CHECK(this, get_input_size() == 2);
     NODE_VALIDATION_CHECK(this,
-                          get_input_element_type(0) == get_input_element_type(1),
+                          get_input_element_type(0).compatible(get_input_element_type(1)),
                           "Sorted sequence and values must have the same element type.");
 
     const auto& sorted_shape = get_input_partial_shape(0);
diff --git a/src/core/tests/type_prop/search_sorted.cpp b/src/core/tests/type_prop/search_sorted.cpp
index 12af514038ec24..efc2c865416143 100644
--- a/src/core/tests/type_prop/search_sorted.cpp
+++ b/src/core/tests/type_prop/search_sorted.cpp
@@ -33,10 +33,14 @@ TEST(type_prop, search_sorted_shape_infer_sorted_dynamic) {
     PerformShapeTest(PartialShape::dynamic(), {1, 3, 6}, {1, 3, 6});
 }
 
-TEST(type_prop, search_sorted_shape_infer_values_dynamic) {
+TEST(type_prop, search_sorted_shape_infer_values_dynamic_1) {
     PerformShapeTest({1, 3, 7, 5}, PartialShape::dynamic(), {1, 3, 7, -1});
 }
 
+TEST(type_prop, search_sorted_shape_infer_values_dynamic_2) {
+    PerformShapeTest({1666}, PartialShape::dynamic(), PartialShape::dynamic());
+}
+
 TEST(type_prop, search_sorted_shape_infer_different_last_dim) {
     PerformShapeTest({1, 3, 7, 100}, {1, 3, 7, 10}, {1, 3, 7, 10});
 }
@@ -73,6 +77,22 @@ TEST(type_prop, search_sorted_shape_infer_both_dynamic_5) {
     PerformShapeTest({-1}, {-1, -1, 3}, {-1, -1, 3});
 }
 
+TEST(type_prop, search_sorted_shape_infer_both_dynamic_6) {
+    PerformShapeTest({-1}, PartialShape::dynamic(), PartialShape::dynamic());
+}
+
+TEST(type_prop, search_sorted_shape_infer_both_dynamic_7) {
+    PerformShapeTest({20, 30, 40, -1}, PartialShape::dynamic(), {20, 30, 40, -1});
+}
+
+TEST(type_prop, search_sorted_shape_infer_both_dynamic_8) {
+    PerformShapeTest({10, 20, 30, 40, -1}, {-1, -1, 30, -1, 100}, {10, 20, 30, 40, 100});
+}
+
+TEST(type_prop, search_sorted_shape_infer_both_dynamic_9) {
+    PerformShapeTest({-1, -1}, PartialShape::dynamic(), {-1, -1});
+}
+
 TEST(type_prop, search_sorted_shape_infer_different_types) {
     auto sorted = make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 6});
     auto values = make_shared<ov::op::v0::Parameter>(element::i32, Shape{1, 3, 6});
diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp
index c1f80f102e5a87..db979a35d932af 100644
--- a/src/frontends/ir/src/frontend.cpp
+++ b/src/frontends/ir/src/frontend.cpp
@@ -37,19 +37,17 @@ size_t get_ir_version(const pugi::xml_document& doc) {
     return 0;
 }
 
+constexpr size_t HEADER_SIZE_LIM = 512lu;
+
 /**
  * @brief Extracts IR version from model stream
  * @param model Model's stream
  * @return IR version, 0 if model does represent IR
  */
-size_t get_ir_version(std::istream& model) {
+size_t get_ir_version(const char* model, size_t model_size) {
     // IR version is a value of root tag attribuite thought not need to parse the whole stream.
-    std::array<char, 512> header{};
-    model.seekg(0, model.beg);
-    model.read(header.data(), header.size());
-    model.clear();
-    model.seekg(0, model.beg);
 
+    size_t header_size = model_size > HEADER_SIZE_LIM ? HEADER_SIZE_LIM : model_size;
     pugi::xml_document doc;
 
     // For dominant number of IRs `load_buffer' in this case returns parsing-error as 512 is not enough for the whole
@@ -57,15 +55,32 @@ size_t get_ir_version(std::istream& model) {
     // tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that
     // was successfully parsed." root node is processed because it should be enough to read model version. However if IR
     // is small enough to fit 512 bytes ok-status is returned. Thus ignoring returned value.
-    std::ignore =
-        doc.load_buffer(header.data(), header.size(), pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8);
+    std::ignore = doc.load_buffer(model, header_size, pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8);
 
     auto ir_version = get_ir_version(doc);
 
     // In case attribute name is very long and placed before version attribute of root node or there is long comment
     // node before root node then version attribute of root node is not accesible within first 512 bytes, so read the
     // whole stream and try to obtain version value.
-    if (ir_version == 0) {
+    if (ir_version == 0lu && header_size < model_size &&
+        doc.load_buffer(model, model_size, pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8)) {
+        ir_version = get_ir_version(doc);
+    }
+
+    return ir_version;
+}
+
+size_t get_ir_version(std::istream& model) {
+    char header[HEADER_SIZE_LIM];
+
+    model.seekg(0, model.beg);
+    model.read(header, HEADER_SIZE_LIM);
+    model.clear();
+    model.seekg(0, model.beg);
+
+    auto ir_version = get_ir_version(header, HEADER_SIZE_LIM);
+    if (ir_version == 0lu) {
+        pugi::xml_document doc;
         if (doc.load(model))
             ir_version = get_ir_version(doc);
 
@@ -75,6 +90,7 @@ size_t get_ir_version(std::istream& model) {
 
     return ir_version;
 }
+
 }  // namespace
 
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
@@ -82,6 +98,7 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
     size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
     std::ifstream local_model_stream;
     std::istream* provided_model_stream = nullptr;
+    std::shared_ptr<AlignedBuffer> model_buffer = nullptr;
 
     if (variants.empty() || variants.size() > 3 + extra_variants_num) {
         return false;
@@ -102,6 +119,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
         provided_model_stream = model_variant.as<std::istream*>();
     } else if (model_variant.is<std::istringstream*>()) {
         provided_model_stream = model_variant.as<std::istringstream*>();
+    } else if (model_variant.is<std::shared_ptr<AlignedBuffer>>()) {
+        model_buffer = model_variant.as<std::shared_ptr<AlignedBuffer>>();
     }
 
     if (provided_model_stream && local_model_stream.is_open()) {
@@ -114,6 +133,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
     } else if (local_model_stream.is_open()) {
         version = get_ir_version(local_model_stream);
         local_model_stream.close();
+    } else if (model_buffer) {
+        version = get_ir_version(model_buffer->get_ptr<char>(), model_buffer->size());
     } else {
         return false;
     }
@@ -135,6 +156,7 @@ void FrontEnd::add_extension(const ov::Extension::Ptr& ext) {
 InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
     std::ifstream local_model_stream;
     std::istream* provided_model_stream = nullptr;
+    std::shared_ptr<ov::AlignedBuffer> model_buf;
     std::shared_ptr<ov::AlignedBuffer> weights;
 
     auto create_extensions_map = [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
@@ -153,6 +175,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
             auto input_model = std::make_shared<InputModel>(local_model_stream, weights, create_extensions_map());
             local_model_stream.close();
             return input_model;
+        } else if (model_buf) {
+            return std::make_shared<InputModel>(model_buf, weights, create_extensions_map());
         }
         return nullptr;
     };
@@ -184,6 +208,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
         provided_model_stream = model_variant.as<std::istream*>();
     } else if (model_variant.is<std::istringstream*>()) {
         provided_model_stream = model_variant.as<std::istringstream*>();
+    } else if (model_variant.is<std::shared_ptr<AlignedBuffer>>()) {
+        model_buf = model_variant.as<std::shared_ptr<AlignedBuffer>>();
     }
 
     // Check weights and extensions
diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp
index 968cbac7a51430..6c59617c69a48d 100644
--- a/src/frontends/ir/src/input_model.cpp
+++ b/src/frontends/ir/src/input_model.cpp
@@ -207,28 +207,47 @@ class InputModel::InputModelIRImpl {
     pugi::xml_document m_xml_doc;
 
 public:
-    InputModelIRImpl(std::istream& stream,
+    InputModelIRImpl(std::istream& model,
                      const std::shared_ptr<ov::AlignedBuffer>& weights,
                      const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions)
         : m_weights(weights),
           m_extensions(extensions) {
-        pugi::xml_parse_result res = m_xml_doc.load(stream);
-        if (res.status != pugi::status_ok) {
-            OPENVINO_THROW(res.description(), " at offset ", res.offset);
-        }
+        pugi::xml_parse_result res = m_xml_doc.load(model);
+        OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
+        init_opset();
+    }
+
+    InputModelIRImpl(const std::shared_ptr<ov::AlignedBuffer>& model,
+                     const std::shared_ptr<ov::AlignedBuffer>& weights,
+                     const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions)
+        : m_weights(weights),
+          m_extensions(extensions) {
+        auto res = m_xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8);
+        OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
+        init_opset();
+    }
+
+    std::shared_ptr<ov::Model> convert();
+
+private:
+    void init_opset() {
         m_root = m_xml_doc.document_element();
         for (const auto& it : ov::get_available_opsets()) {
             m_opsets[it.first] = it.second();
         }
     }
-
-    std::shared_ptr<ov::Model> convert();
 };
 
-InputModel::InputModel(std::istream& stream,
+InputModel::InputModel(std::istream& model,
+                       const std::shared_ptr<ov::AlignedBuffer>& weights,
+                       const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions) {
+    _impl = std::make_shared<InputModelIRImpl>(model, weights, extensions);
+}
+
+InputModel::InputModel(const std::shared_ptr<ov::AlignedBuffer>& model,
                        const std::shared_ptr<ov::AlignedBuffer>& weights,
                        const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions) {
-    _impl = std::make_shared<InputModelIRImpl>(stream, weights, extensions);
+    _impl = std::make_shared<InputModelIRImpl>(model, weights, extensions);
 }
 
 std::shared_ptr<ov::Model> InputModel::convert() {
diff --git a/src/frontends/ir/src/input_model.hpp b/src/frontends/ir/src/input_model.hpp
index a1878fe88d6714..331092749bbeb9 100644
--- a/src/frontends/ir/src/input_model.hpp
+++ b/src/frontends/ir/src/input_model.hpp
@@ -24,6 +24,10 @@ class InputModel : public ov::frontend::InputModel {
                const std::shared_ptr<ov::AlignedBuffer>& weights,
                const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions);
 
+    InputModel(const std::shared_ptr<ov::AlignedBuffer>& model_buf,
+               const std::shared_ptr<ov::AlignedBuffer>& weights,
+               const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions);
+
     std::shared_ptr<Model> convert();
 };
 
diff --git a/src/inference/dev_api/openvino/runtime/icore.hpp b/src/inference/dev_api/openvino/runtime/icore.hpp
index 378221b0dc7dbe..659b9c5c0f5788 100644
--- a/src/inference/dev_api/openvino/runtime/icore.hpp
+++ b/src/inference/dev_api/openvino/runtime/icore.hpp
@@ -11,6 +11,7 @@
 
 #include <memory>
 
+#include "openvino/runtime/aligned_buffer.hpp"
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/so_ptr.hpp"
@@ -45,6 +46,15 @@ class OPENVINO_RUNTIME_API ICore {
                                                   const ov::Tensor& weights,
                                                   bool frontend_mode = false) const = 0;
 
+    /**
+     * @brief Reads IR xml and bin from buffer
+     * @param model shared pointer to aligned buffer with IR
+     * @param weights shared pointer to aligned buffer with weights
+     * @return shared pointer to ov::Model
+     */
+    virtual std::shared_ptr<ov::Model> read_model(const std::shared_ptr<AlignedBuffer>& model,
+                                                  const std::shared_ptr<AlignedBuffer>& weights) const = 0;
+
     /**
      * @brief Reads IR xml and bin files
      * @param model_path path to IR file
diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp
index eb4bc9bee916a7..60d6b66cfda897 100644
--- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp
+++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp
@@ -29,6 +29,13 @@ static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> sup
  */
 static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"};
 
+/**
+ * @brief Read-only property to get a std::vector<PropertyName> of properties
+ * which should affect the loading time from cache
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<bool, PropertyMutability::RO> caching_with_mmap{"CACHING_WITH_MMAP"};
+
 /**
  * @brief Allow to create exclusive_async_requests with one executor
  * @ingroup ov_dev_api_plugin_api
diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp
index b14fe2abe18a7e..9e9ebd3ddcbc2b 100644
--- a/src/inference/src/cache_manager.hpp
+++ b/src/inference/src/cache_manager.hpp
@@ -15,6 +15,7 @@
 #include <string>
 
 #include "openvino/util/file_util.hpp"
+#include "openvino/util/mmap_object.hpp"
 
 namespace ov {
 
@@ -78,7 +79,7 @@ class ICacheManager {
      * @param id Id of cache (hash of the model)
      * @param reader Lambda function to be called when input stream is created
      */
-    virtual void read_cache_entry(const std::string& id, StreamReader reader) = 0;
+    virtual void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) = 0;
 
     /**
      * @brief Callback when OpenVINO intends to remove cache entry
@@ -129,13 +130,18 @@ class FileStorageCacheManager final : public ICacheManager {
         writer(stream);
     }
 
-    void read_cache_entry(const std::string& id, StreamReader reader) override {
+    void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) override {
         // Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C".
         ScopedLocale plocal_C(LC_ALL, "C");
-        auto blobFileName = getBlobFile(id);
-        if (ov::util::file_exists(blobFileName)) {
-            std::ifstream stream(blobFileName, std::ios_base::binary);
-            reader(stream);
+        auto blob_file_name = getBlobFile(id);
+        if (ov::util::file_exists(blob_file_name)) {
+            if (mmap) {
+                MmapStream stream(blob_file_name);
+                reader(stream);
+            } else {
+                std::ifstream stream(blob_file_name, std::ios_base::binary);
+                reader(stream);
+            }
         }
     }
 
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 237c246ab38bdc..9f55dc53ccd24f 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -24,6 +24,7 @@
 #include "openvino/runtime/itensor.hpp"
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/runtime/remote_context.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "openvino/util/common_util.hpp"
 #include "openvino/util/file_util.hpp"
@@ -1396,6 +1397,13 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(ov::Plugin&
     return compiled_model;
 }
 
+static bool does_plugin_support_model_caching_with_mmap(const ov::Plugin& plugin) {
+    bool supported = plugin.supports_model_caching();
+    supported &=
+        ov::util::contains(plugin.get_property(ov::internal::supported_properties), ov::internal::caching_with_mmap);
+    return supported;
+}
+
 ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
     const CacheContent& cacheContent,
     ov::Plugin& plugin,
@@ -1406,43 +1414,48 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
     struct HeaderException {};
 
     OPENVINO_ASSERT(cacheContent.cacheManager != nullptr);
+
     try {
-        cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, [&](std::istream& networkStream) {
-            OV_ITT_SCOPE(FIRST_INFERENCE,
-                         ov::itt::domains::LoadTime,
-                         "Core::load_model_from_cache::ReadStreamAndImport");
-            try {
-                ov::CompiledBlobHeader header;
-                networkStream >> header;
-                if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
-                    // Original file is changed, don't use cache
-                    OPENVINO_THROW("Original model file is changed");
-                }
-                if (util::contains(plugin.get_property(ov::internal::supported_properties),
-                                   ov::internal::compiled_model_runtime_properties_supported.name())) {
-                    ov::AnyMap compiled_model_runtime_properties = {
-                        {ov::internal::compiled_model_runtime_properties.name(),
-                         std::string(header.get_runtime_info())}};
-                    auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(),
-                                                   compiled_model_runtime_properties);
-                    if (!res.as<bool>()) {
-                        OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!");
+        cacheContent.cacheManager->read_cache_entry(
+            cacheContent.blobId,
+            [&](std::istream& networkStream) {
+                OV_ITT_SCOPE(FIRST_INFERENCE,
+                             ov::itt::domains::LoadTime,
+                             "Core::load_model_from_cache::ReadStreamAndImport");
+                try {
+                    ov::CompiledBlobHeader header;
+                    networkStream >> header;
+                    if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
+                        // Original file is changed, don't use cache
+                        OPENVINO_THROW("Original model file is changed");
                     }
-                } else {
-                    if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) {
-                        // Build number mismatch, don't use this cache
-                        OPENVINO_THROW("Version does not match");
+                    if (util::contains(plugin.get_property(ov::internal::supported_properties),
+                                       ov::internal::compiled_model_runtime_properties_supported.name())) {
+                        ov::AnyMap compiled_model_runtime_properties = {
+                            {ov::internal::compiled_model_runtime_properties.name(),
+                             std::string(header.get_runtime_info())}};
+                        auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(),
+                                                       compiled_model_runtime_properties);
+                        if (!res.as<bool>()) {
+                            OPENVINO_THROW(
+                                "Original model runtime properties have been changed, not supported anymore!");
+                        }
+                    } else {
+                        if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) {
+                            // Build number mismatch, don't use this cache
+                            OPENVINO_THROW("Version does not match");
+                        }
                     }
+                } catch (...) {
+                    throw HeaderException();
                 }
-            } catch (...) {
-                throw HeaderException();
-            }
 
-            ov::AnyMap update_config = config;
-            update_config[ov::loaded_from_cache.name()] = true;
-            compiled_model = context ? plugin.import_model(networkStream, context, update_config)
-                                     : plugin.import_model(networkStream, update_config);
-        });
+                ov::AnyMap update_config = config;
+                update_config[ov::loaded_from_cache.name()] = true;
+                compiled_model = context ? plugin.import_model(networkStream, context, update_config)
+                                         : plugin.import_model(networkStream, update_config);
+            },
+            does_plugin_support_model_caching_with_mmap(plugin));
     } catch (const HeaderException&) {
         // For these exceptions just remove old cache and set that import didn't work
         cacheContent.cacheManager->remove_cache_entry(cacheContent.blobId);
@@ -1603,6 +1616,12 @@ std::shared_ptr<ov::Model> ov::CoreImpl::read_model(const std::string& model,
     return ov::util::read_model(model, weights, extensions, frontendMode);
 }
 
+std::shared_ptr<ov::Model> ov::CoreImpl::read_model(const std::shared_ptr<AlignedBuffer>& model,
+                                                    const std::shared_ptr<AlignedBuffer>& weights) const {
+    OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "CoreImpl::read_model from memory");
+    return ov::util::read_model(model, weights, extensions);
+}
+
 std::map<std::string, ov::Version> ov::CoreImpl::get_versions(const std::string& deviceName) const {
     std::map<std::string, ov::Version> versions;
     std::vector<std::string> deviceNames;
diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp
index 40f2a15bb725e0..79b1b96d57ac30 100644
--- a/src/inference/src/dev/core_impl.hpp
+++ b/src/inference/src/dev/core_impl.hpp
@@ -253,6 +253,9 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
                                           const ov::Tensor& weights,
                                           bool frontend_mode = false) const override;
 
+    std::shared_ptr<ov::Model> read_model(const std::shared_ptr<AlignedBuffer>& model,
+                                          const std::shared_ptr<AlignedBuffer>& weights) const override;
+
     std::shared_ptr<ov::Model> read_model(const std::string& model_path, const std::string& bin_path) const override;
 
     ov::SoPtr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp
index 288389c46db859..40207bac9087fa 100644
--- a/src/inference/src/dev/plugin.cpp
+++ b/src/inference/src/dev/plugin.cpp
@@ -73,10 +73,10 @@ ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model, cons
     OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, properties), m_so});
 }
 
-ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& networkModel,
+ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
                                                        const ov::SoPtr<ov::IRemoteContext>& context,
                                                        const ov::AnyMap& config) const {
-    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(networkModel, context, config), m_so});
+    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so});
 }
 
 ov::SoPtr<ov::IRemoteContext> ov::Plugin::create_context(const AnyMap& params) const {
diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp
index 9eeed484840fff..14a5adebbab3a4 100644
--- a/src/inference/src/dev/plugin.hpp
+++ b/src/inference/src/dev/plugin.hpp
@@ -55,7 +55,7 @@ class Plugin {
 
     SoPtr<ov::ICompiledModel> import_model(std::istream& model, const ov::AnyMap& properties) const;
 
-    SoPtr<ov::ICompiledModel> import_model(std::istream& networkModel,
+    SoPtr<ov::ICompiledModel> import_model(std::istream& model,
                                            const ov::SoPtr<ov::IRemoteContext>& context,
                                            const ov::AnyMap& config) const;
 
diff --git a/src/inference/src/model_reader.cpp b/src/inference/src/model_reader.cpp
index febd9b1174dda6..aaf620ea0f803a 100644
--- a/src/inference/src/model_reader.cpp
+++ b/src/inference/src/model_reader.cpp
@@ -195,5 +195,33 @@ std::shared_ptr<ov::Model> read_model(const std::string& model,
     OPENVINO_THROW("Unable to read the model. Please check if the model format is supported and model is correct.");
 }
 
+std::shared_ptr<ov::Model> read_model(const std::shared_ptr<AlignedBuffer>& model,
+                                      const std::shared_ptr<AlignedBuffer>& weights,
+                                      const std::vector<ov::Extension::Ptr>& ov_exts) {
+    // Try to load with FrontEndManager
+    ov::frontend::FrontEndManager manager;
+    ov::frontend::FrontEnd::Ptr FE;
+    ov::frontend::InputModel::Ptr inputModel;
+
+    ov::AnyVector params{model};
+    if (weights) {
+        params.emplace_back(weights);
+    }
+
+    FE = manager.load_by_model(params);
+    if (FE) {
+        FE->add_extension(ov_exts);
+        inputModel = FE->load(params);
+    }
+    if (inputModel) {
+        auto model = FE->convert(inputModel);
+        update_v10_model(model);
+        return model;
+    }
+
+    OPENVINO_THROW(
+        "[ CORE ] Unable to read the model. Please check if the model format is supported and model is correct.");
+}
+
 }  // namespace util
 }  // namespace ov
diff --git a/src/inference/src/model_reader.hpp b/src/inference/src/model_reader.hpp
index 4617fa55b83e9b..433da2ee5d2107 100644
--- a/src/inference/src/model_reader.hpp
+++ b/src/inference/src/model_reader.hpp
@@ -9,6 +9,7 @@
 
 #include "openvino/core/extension.hpp"
 #include "openvino/core/model.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
 
 namespace ov {
 
@@ -28,6 +29,17 @@ std::shared_ptr<ov::Model> read_model(const std::string& modelPath,
                                       const std::vector<ov::Extension::Ptr>& extensions,
                                       bool enable_mmap);
 
+/**
+ * @brief Reads model
+ * @param model shared pointer to aligned buffer with IR.
+ * @param weights shared pointer to aligned buffer with weights.
+ * @param extensions vector with OpenVINO extensions
+ * @return Shared pointer to ov::Model
+ */
+std::shared_ptr<ov::Model> read_model(const std::shared_ptr<ov::AlignedBuffer>& model,
+                                      const std::shared_ptr<ov::AlignedBuffer>& weights,
+                                      const std::vector<ov::Extension::Ptr>& extensions);
+
 /**
  * @brief Reads model
  * @param model Serialized model representation
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 72943b837f1f3b..bbee5d937be5d5 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -10,7 +10,6 @@
 #include "memory_state.h"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
-#include "serialize.h"
 #include "openvino/runtime/threading/executor_manager.hpp"
 #include "transformations/transformation_pipeline.h"
 #include "openvino/runtime/properties.hpp"
@@ -19,6 +18,7 @@
 #include "transformations/utils/utils.hpp"
 #include "openvino/runtime/threading/cpu_streams_info.hpp"
 #include "openvino/runtime/threading/cpu_message.hpp"
+#include "utils/serialize.hpp"
 
 #include "cpu/x64/cpu_isa_traits.hpp"
 #include <cstring>
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 1aae0adf83bb47..2b9cdcc4ac1203 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -379,6 +379,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
             } catch (ov::Exception&) {
                 OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name());
             }
+        } else if (key == ov::internal::caching_with_mmap.name()) {
         } else {
             OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin.");
         }
diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp
index 0ecb6fd63238d0..a4c99e2cc1fca7 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp
@@ -1928,6 +1928,60 @@ std::set<std::vector<element::Type>> jit_sigmoid_emitter::get_supported_precisio
     return {{element::f32}};
 }
 
+/// SOFT_SIGN ///
+jit_soft_sign_emitter::jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
+                                             dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+                                             const std::shared_ptr<ov::Node>& node)
+        : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {
+    prepare_table();
+}
+
+jit_soft_sign_emitter::jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
+                                             dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+                                             const ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) {
+    prepare_table();
+}
+
+size_t jit_soft_sign_emitter::get_inputs_count() const { return 1; }
+
+size_t jit_soft_sign_emitter::get_aux_vecs_count() const { return 2; }
+
+size_t jit_soft_sign_emitter::get_aux_gprs_count() const { return 1; }
+
+void jit_soft_sign_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+    if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) {
+        emit_isa<dnnl::impl::cpu::aarch64::asimd>(in_vec_idxs, out_vec_idxs);
+    } else {
+        OPENVINO_THROW("Can't create jit eltwise kernel");
+    }
+}
+
+template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
+void jit_soft_sign_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
+    if (exec_prc_ != ov::element::f32) {
+        OPENVINO_THROW("unsupported precision: " + exec_prc_.to_string());
+    }
+
+    using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::TReg;
+    const TReg src(in_vec_idxs[0]);
+    const TReg dst(out_vec_idxs[0]);
+    const TReg aux1(aux_vec_idxs[0]);
+    const TReg aux2(aux_vec_idxs[1]);
+
+    h->fabs(aux1.s, src.s);
+    h->ld1r(aux2.s, table_val2("one"));
+    h->fadd(aux1.s, aux1.s, aux2.s);
+    h->fdiv(dst.s, src.s, aux1.s);
+}
+
+void jit_soft_sign_emitter::register_table_entries() {
+    push_arg_entry_of("one", 0x3f800000, true);
+}
+
+std::set<std::vector<element::Type>> jit_soft_sign_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
+    return {{element::f32}};
+}
+
 /// SUBTRACT ///
 jit_subtract_emitter::jit_subtract_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
                                            dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp
index 8f3181355737eb..ccd82bc5b628e7 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp
@@ -779,6 +779,35 @@ class jit_sigmoid_emitter : public jit_emitter {
     void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
 };
 
+class jit_soft_sign_emitter : public jit_emitter {
+public:
+    jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
+                          dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+                          const ov::element::Type exec_prc = ov::element::f32);
+
+    jit_soft_sign_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
+                          dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+                          const std::shared_ptr<ov::Node>& node);
+
+    size_t get_inputs_count() const override;
+
+    size_t get_aux_vecs_count() const override;
+
+    size_t get_aux_gprs_count() const override;
+
+    void register_table_entries() override;
+
+    static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
+
+private:
+    std::unique_ptr<jit_exp_emitter> exp_emitter;
+
+    void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
+
+    template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
+    void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
+};
+
 class jit_subtract_emitter : public jit_emitter {
 public:
     jit_subtract_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp
index 0083f56a9ecde6..7848e479f175e4 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp
@@ -47,6 +47,7 @@ bool JitEltwiseExecutor::isSupported(
                                      Algorithm::EltwiseRelu,
                                      Algorithm::EltwiseSelect,
                                      Algorithm::EltwiseSigmoid,
+                                     Algorithm::EltwiseSoftSign,
                                      Algorithm::EltwiseSubtract,
                                      Algorithm::EltwiseSwish,
                                      Algorithm::EltwiseTanh);
diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp
index c311c40714cb37..4ee5707e0a9e76 100644
--- a/src/plugins/intel_cpu/src/nodes/input.cpp
+++ b/src/plugins/intel_cpu/src/nodes/input.cpp
@@ -287,21 +287,20 @@ void Input::cloneBlobIfRequired() {
         return ptr;
     };
 
-    auto isBlobAligned = [&, this] () {
-        const void *ptr = constOp->get_data_ptr();
+    auto isBlobAligned = [&] () {
         bool blobAlignedOnSSE = true;
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
         // Majority of arithmetic and data processing instructions in legacy SSE isa requires
         // the memory address in the operands must be aligned on 16-byte boundary. To ensure
         // safely reusing ngraph const blob memory, need to check address alignment.
+        const void *ptr = constOp->get_data_ptr();
         blobAlignedOnSSE = mayiuse(cpu_isa_t::avx2) || ((reinterpret_cast<uintptr_t>(ptr) & 15) == 0);
 #endif
-        const bool blobAlignedWithPrec = prec.size() > 1 ? (reinterpret_cast<size_t>(ptr) % prec.size()) == 0 : true;
-        return blobAlignedWithPrec && blobAlignedOnSSE;
+        return blobAlignedOnSSE;
     };
 
     // The presence of subnormals is better to determined at IR read time.
-    auto hasSubnormals = [&, this] () {
+    auto hasSubnormals = [&] () {
         if (prec == ov::element::f32) {
             uint32_t const *u32data = constOp->get_data_ptr<uint32_t>();
 
@@ -344,7 +343,7 @@ void Input::cloneBlobIfRequired() {
         return false;
     };
 
-    auto blobKey = [&, this] () {
+    auto blobKey = [&] () {
         char ptr[32];
         snprintf(ptr, sizeof ptr, "%p", constOp->get_data_ptr());
         return getName()
@@ -362,7 +361,6 @@ void Input::cloneBlobIfRequired() {
         // This is possible only in multistream case on multisocket machine.
         // TODO: don't clone blob for multisocket + multistream case if current stream is run on the numa node where original weights are stored.
         (!weightCache || context->getNumNumaNodes() == 1 || context->getCPUStreamExecutor()->get_streams_num() == 1);
-
     memoryPtr = clone_is_not_needed ? std::make_shared<Memory>(getEngine(), memDesc, constOp->get_data_ptr())
                                     : std::const_pointer_cast<const IMemory>(
                                           weightCache ? *weightCache->findOrCreate(blobKey(), cloneBlob) : cloneBlob());
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp
index 2402f613cfd67a..59a5f812499481 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp
@@ -669,6 +669,7 @@ std::shared_ptr<jit_emitter> jit_uni_eltwise_generic<isa>::create_eltwise_emitte
     OV_CASE(Algorithm::EltwiseRelu, ov::intel_cpu::aarch64::jit_relu_emitter),
     OV_CASE(Algorithm::EltwiseSelect, ov::intel_cpu::aarch64::jit_select_emitter),
     OV_CASE(Algorithm::EltwiseSigmoid, ov::intel_cpu::aarch64::jit_sigmoid_emitter),
+    OV_CASE(Algorithm::EltwiseSoftSign, ov::intel_cpu::aarch64::jit_soft_sign_emitter),
     OV_CASE(Algorithm::EltwiseSubtract, ov::intel_cpu::aarch64::jit_subtract_emitter),
     OV_CASE(Algorithm::EltwiseSwish, ov::intel_cpu::aarch64::jit_swish_emitter),
     OV_CASE(Algorithm::EltwiseTanh, ov::intel_cpu::aarch64::jit_tanh_emitter));
@@ -845,6 +846,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
         OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
         OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter),
         OV_CASE(Algorithm::EltwiseSigmoid, jit_sigmoid_emitter),
+        OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter),
         OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter),
         OV_CASE(Algorithm::EltwiseSwish, jit_swish_emitter),
         OV_CASE(Algorithm::EltwiseTanh, jit_tanh_emitter));
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 3bfe2ec01a360d..fa1810ff6044f9 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -4,6 +4,7 @@
 
 #include "plugin.h"
 
+#include "cpu_streams_calculation.hpp"
 #include "internal_properties.hpp"
 #include "itt.h"
 #include "openvino/runtime/intel_cpu/properties.hpp"
@@ -11,13 +12,14 @@
 #include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/threading/cpu_streams_info.hpp"
 #include "openvino/runtime/threading/executor_manager.hpp"
-#include "openvino/util/codec_xor.hpp"
-#include "serialize.h"
 #include "transformations/transformation_pipeline.h"
 #include "transformations/utils/utils.hpp"
+#include "utils/codec_xor.hpp"
 #include "utils/denormals.hpp"
 #include "utils/precision_support.h"
+#include "utils/serialize.hpp"
 #include "weights_cache.hpp"
+#include "openvino/op/paged_attention.hpp"
 
 #if defined(__linux__)
 #    include <signal.h>
@@ -196,9 +198,9 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
     if (op::util::has_op_with_type<op::v1::Convolution>(model) ||
         op::util::has_op_with_type<op::v1::ConvolutionBackpropData>(model))
         return Config::ModelType::CNN;
-    
-    if (op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) &&
-        model->get_variables().size() > 0)
+
+    if ((op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) && model->get_variables().size() > 0) ||
+         op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
         return Config::ModelType::LLM;
 
     return Config::ModelType::Unknown;
@@ -253,8 +255,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     calculate_streams(conf, cloned_model);
 
     if (!conf.cacheEncrypt || !conf.cacheDecrypt) {
-        conf.cacheEncrypt = ov::util::codec_xor;
-        conf.cacheDecrypt = ov::util::codec_xor;
+        conf.cacheEncrypt = codec_xor_str;
+        conf.cacheDecrypt = codec_xor_str;
     }
 
     transformations.PostLpt();
@@ -444,6 +446,9 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
     } else if (ov::internal::supported_properties == name) {
         return decltype(ov::internal::supported_properties)::value_type{
             ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
+#if !defined(OPENVINO_ARCH_ARM)
+            ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
+#endif
             ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
             ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
             ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(),
@@ -545,25 +550,24 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     return res;
 }
 
-std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& networkModel, const ov::AnyMap& config) const {
+std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
+                                                         const ov::AnyMap& config) const {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");
 
-    std::function<std::string(const std::string&)> decrypt;
+    CacheDecrypt decrypt{ codec_xor };
+    bool decript_from_string = false;
     if (config.count(ov::cache_encryption_callbacks.name())) {
         auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as<EncryptionCallbacks>();
-        decrypt = encryption_callbacks.decrypt;
-    }
-
-    if (!decrypt) {
-        decrypt = ov::util::codec_xor;
+        decrypt.m_decrypt_str = encryption_callbacks.decrypt;
+        decript_from_string = true;
     }
 
     ModelDeserializer deserializer(
-        networkModel,
-        [this](const std::string& model, const ov::Tensor& weights) {
-            return get_core()->read_model(model, weights, true);
+        model_stream,
+        [this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
+            return get_core()->read_model(model, weights);
         },
-        std::move(decrypt));
+        decrypt, decript_from_string);
 
     std::shared_ptr<ov::Model> model;
     deserializer >> model;
diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
index c2d24e98ee6f98..2548ba2c1cc8af 100644
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include "compiled_model.h"
-#include "cpu_streams_calculation.hpp"
 #include "openvino/runtime/threading/cpu_message.hpp"
 
 namespace ov {
@@ -22,7 +21,7 @@ class Plugin : public ov::IPlugin {
                                                       const ov::AnyMap& properties,
                                                       const ov::SoPtr<ov::IRemoteContext>& context) const override {
         OPENVINO_THROW_NOT_IMPLEMENTED(
-            "Not Implemented compile_model with RemoteContext is not supported by CPU plugin!");
+            "compile_model with RemoteContext is not supported by CPU plugin!");
     };
 
     void set_property(const ov::AnyMap& properties) override;
@@ -32,16 +31,16 @@ class Plugin : public ov::IPlugin {
                                                      const ov::SoPtr<ov::IRemoteContext>& context,
                                                      const ov::AnyMap& properties) const override {
         OPENVINO_THROW_NOT_IMPLEMENTED(
-            "Not Implemented import_model with RemoteContext is not supported by CPU plugin!");
+            "import_model with RemoteContext is not supported by CPU plugin!");
     };
 
     ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
                                     const ov::AnyMap& properties) const override;
     ov::SoPtr<ov::IRemoteContext> create_context(const ov::AnyMap& remote_properties) const override {
-        OPENVINO_THROW_NOT_IMPLEMENTED("Not Implemented create_context  is not supported by CPU plugin!");
+        OPENVINO_THROW_NOT_IMPLEMENTED("create_context is not supported by CPU plugin!");
     };
     ov::SoPtr<ov::IRemoteContext> get_default_context(const ov::AnyMap& remote_properties) const override {
-        OPENVINO_THROW_NOT_IMPLEMENTED("Not Implemented get_default_context  is not supported by CPU plugin!");
+        OPENVINO_THROW_NOT_IMPLEMENTED("get_default_context is not supported by CPU plugin!");
     };
 
     std::shared_ptr<ov::threading::MessageManager> m_msg_manager;
diff --git a/src/plugins/intel_cpu/src/serialize.cpp b/src/plugins/intel_cpu/src/serialize.cpp
deleted file mode 100644
index 16583cf1c73ef2..00000000000000
--- a/src/plugins/intel_cpu/src/serialize.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright (C) 2018-2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#include "serialize.h"
-
-#include <pugixml.hpp>
-
-#include "openvino/pass/serialize.hpp"
-#include "openvino/util/codec_xor.hpp"
-#include "transformations/utils/utils.hpp"
-
-namespace ov {
-namespace intel_cpu {
-
-static void setInfo(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {
-    pugi::xml_node outputs = root.child("outputs");
-    auto nodes_it = outputs.children("out").begin();
-    size_t size = model->outputs().size();
-    for (size_t i = 0; i < size; ++nodes_it, i++) {
-        std::string name = nodes_it->attribute("name").value();
-        if (name.empty())
-            continue;
-        auto result = model->output(i).get_node_shared_ptr();
-        ov::descriptor::set_ov_tensor_legacy_name(result->input_value(0).get_tensor(), name);
-    }
-}
-
-ModelSerializer::ModelSerializer(std::ostream& ostream, cache_encrypt encrypt_fn)
-    : _ostream(ostream), _cache_encrypt(std::move(encrypt_fn)) {}
-
-void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {
-    auto serializeInfo = [&](std::ostream& stream) {
-        const std::string name = "cnndata";
-        pugi::xml_document xml_doc;
-        pugi::xml_node root = xml_doc.append_child(name.c_str());
-        pugi::xml_node outputs = root.append_child("outputs");
-        for (const auto& out : model->get_results()) {
-            auto out_node = outputs.append_child("out");
-            const std::string name = ov::descriptor::get_ov_tensor_legacy_name(out->input_value(0).get_tensor());
-            out_node.append_attribute("name").set_value(name.c_str());
-        }
-        xml_doc.save(stream);
-    };
-
-    ov::pass::StreamSerialize serializer(_ostream, serializeInfo, _cache_encrypt);
-    serializer.run_on_model(std::const_pointer_cast<ov::Model>(model->clone()));
-}
-
-ModelDeserializer::ModelDeserializer(std::istream & istream, model_builder fn, cache_decrypt decrypt_fn)
-    : _istream(istream)
-    , _model_builder(std::move(fn))
-    , _cache_decrypt(std::move(decrypt_fn)) {
-}
-
-void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
-    using namespace ov::pass;
-
-    std::string xmlString;
-    ov::Tensor dataBlob;
-
-    // get file size before seek content
-    // blob from cache may have other header, skip it
-    const size_t _pos = _istream.tellg();
-    _istream.seekg(0, _istream.end);
-    const size_t file_size = _istream.tellg();
-    _istream.seekg(_pos, _istream.beg);
-
-    StreamSerialize::DataHeader hdr = {};
-    _istream.read(reinterpret_cast<char*>(&hdr), sizeof hdr);
-
-    // check if model header contains valid data
-    bool isValidModel = (hdr.custom_data_offset == sizeof(hdr) + _pos) &&
-                        (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) &&
-                        (hdr.consts_size == hdr.model_offset - hdr.consts_offset) &&
-                        (hdr.model_size = file_size - hdr.model_offset);
-    if (!isValidModel) {
-        OPENVINO_THROW("Failed to read CPU device xml header");
-    }
-    // read model input/output precisions
-    _istream.seekg(hdr.custom_data_offset);
-
-    pugi::xml_document xmlInOutDoc;
-    if (hdr.custom_data_size > 0) {
-        std::string xmlInOutString;
-        xmlInOutString.resize(hdr.custom_data_size);
-        _istream.read(const_cast<char*>(xmlInOutString.c_str()), hdr.custom_data_size);
-        auto res = xmlInOutDoc.load_string(xmlInOutString.c_str());
-        if (res.status != pugi::status_ok) {
-            OPENVINO_THROW("NetworkNotRead: The inputs and outputs information is invalid.");
-        }
-    }
-
-    // read blob content
-    _istream.seekg(hdr.consts_offset);
-    if (hdr.consts_size) {
-        dataBlob = ov::Tensor(ov::element::u8, ov::Shape({hdr.consts_size}));
-        _istream.read(static_cast<char *>(dataBlob.data(ov::element::u8)), hdr.consts_size);
-    }
-
-    // read XML content
-    _istream.seekg(hdr.model_offset);
-    xmlString.resize(hdr.model_size);
-    _istream.read(const_cast<char*>(xmlString.c_str()), hdr.model_size);
-    if (_cache_decrypt) {
-        xmlString = _cache_decrypt(xmlString);
-    }
-
-    model = _model_builder(xmlString, std::move(dataBlob));
-
-    // Set Info
-    pugi::xml_node root = xmlInOutDoc.child("cnndata");
-    setInfo(root, model);
-}
-
-}   // namespace intel_cpu
-}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/serialize.h b/src/plugins/intel_cpu/src/serialize.h
deleted file mode 100644
index b364c428419c96..00000000000000
--- a/src/plugins/intel_cpu/src/serialize.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (C) 2018-2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-
-#include <functional>
-#include <ostream>
-#include <memory>
-#include <string>
-
-#include "openvino/core/model.hpp"
-#include "openvino/runtime/tensor.hpp"
-
-namespace ov {
-namespace intel_cpu {
-
-class ModelSerializer {
-public:
-    typedef std::function<std::string(const std::string&)> cache_encrypt;
-    ModelSerializer(std::ostream& ostream, cache_encrypt encrypt_fn = {});
-    void operator<<(const std::shared_ptr<ov::Model>& model);
-
-private:
-    std::ostream& _ostream;
-    cache_encrypt _cache_encrypt;
-};
-
-class ModelDeserializer {
-public:
-    typedef std::function<std::shared_ptr<ov::Model>(const std::string&, const ov::Tensor&)> model_builder;
-    typedef std::function<std::string(const std::string&)> cache_decrypt;
-    ModelDeserializer(std::istream& istream, model_builder fn, cache_decrypt decrypt_fn = {});
-    void operator>>(std::shared_ptr<ov::Model>& model);
-
-private:
-    std::istream& _istream;
-    model_builder _model_builder;
-    cache_decrypt _cache_decrypt;
-};
-
-}   // namespace intel_cpu
-}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/utils/codec_xor.cpp b/src/plugins/intel_cpu/src/utils/codec_xor.cpp
new file mode 100644
index 00000000000000..06061fc704e228
--- /dev/null
+++ b/src/plugins/intel_cpu/src/utils/codec_xor.cpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "utils/codec_xor.hpp"
+#include "openvino/core/parallel.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+void codec_xor(char* dst_str, const char* src_str, size_t len) {
+    static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F};
+    auto key_size = sizeof(codec_key);
+
+    if (dst_str == src_str) {
+        parallel_for(len, [&](size_t key_idx) {
+            dst_str[key_idx] ^= codec_key[key_idx % key_size];
+        });
+    } else {
+        parallel_for(len, [&](size_t key_idx) {
+            dst_str[key_idx] = src_str[key_idx] ^ codec_key[key_idx % key_size];
+        });
+    }
+}
+
+std::string codec_xor_str(const std::string& source_str) {
+    std::string new_str(source_str);
+    codec_xor(&new_str[0], &new_str[0], new_str.size());
+    return new_str;
+}
+
+}   // namespace intel_cpu
+}   // namespace ov.
diff --git a/src/plugins/intel_cpu/src/utils/codec_xor.hpp b/src/plugins/intel_cpu/src/utils/codec_xor.hpp
new file mode 100644
index 00000000000000..d99a6d0d52bc37
--- /dev/null
+++ b/src/plugins/intel_cpu/src/utils/codec_xor.hpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <functional>
+#include <string>
+
+namespace ov {
+namespace intel_cpu {
+
+void codec_xor(char* dst_str, const char* src_str, size_t len);
+
+std::string codec_xor_str(const std::string& source_str);
+
+typedef std::function<std::string(const std::string&)>               CacheDecryptStr;
+typedef std::function<void(char* dst, const char* src, size_t size)> CacheDecryptChar;
+
+union CacheDecrypt {
+    CacheDecryptChar m_decrypt_char = nullptr;
+    CacheDecryptStr  m_decrypt_str;
+
+    CacheDecrypt() {}
+
+    CacheDecrypt(CacheDecryptStr fn)  : m_decrypt_str(fn) {}
+
+    CacheDecrypt(CacheDecryptChar fn) : m_decrypt_char(fn) {}
+
+    ~CacheDecrypt() {}
+
+    operator bool() {
+        return m_decrypt_char || m_decrypt_str;
+    }
+};
+
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp
new file mode 100644
index 00000000000000..6666d42fb4f586
--- /dev/null
+++ b/src/plugins/intel_cpu/src/utils/serialize.cpp
@@ -0,0 +1,198 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "serialize.hpp"
+
+#include "openvino/core/descriptor_tensor.hpp"
+#include "openvino/core/parallel.hpp"
+#include "openvino/runtime/shared_buffer.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+////////// ModelSerializer //////////
+
+ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn)
+    : m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {}
+
+void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {
+    auto serialize_info = [&](std::ostream& stream) {
+        pugi::xml_document xml_doc;
+        pugi::xml_node root = xml_doc.append_child("cnndata");
+        pugi::xml_node outputs = root.append_child("outputs");
+        for (const auto& out : model->get_results()) {
+            auto out_node = outputs.append_child("out");
+            const auto name = ov::descriptor::get_ov_tensor_legacy_name(out->input_value(0).get_tensor());
+            out_node.append_attribute("name").set_value(name.c_str());
+        }
+        xml_doc.save(stream);
+    };
+
+    ov::pass::StreamSerialize serializer(m_ostream, serialize_info, m_cache_encrypt);
+    serializer.run_on_model(std::const_pointer_cast<ov::Model>(model->clone()));
+}
+
+////////// ModelDeserializer //////////
+
+ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, bool decript_from_string)
+    : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string) {
+        if (m_decript_from_string) {
+            m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
+        } else {
+            m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char;
+        }
+    }
+
+void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {
+    pugi::xml_node outputs = root.child("outputs");
+    auto nodes_it = outputs.children("out").begin();
+    size_t size = model->outputs().size();
+    for (size_t i = 0lu; i < size; ++nodes_it, i++) {
+        std::string name = nodes_it->attribute("name").value();
+        if (name.empty())
+            continue;
+        auto result = model->output(i).get_node_shared_ptr();
+        ov::descriptor::set_ov_tensor_legacy_name(result->input_value(0).get_tensor(), name);
+    }
+}
+
+void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
+    if (auto mmap_stream = dynamic_cast<MmapStream*>(&m_istream)) {
+        process_mmap(model, mmap_stream->m_memory);
+    } else {
+        process_stream(model);
+    }
+}
+
+void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
+                                     const std::shared_ptr<ov::MappedMemory>& mmemory) {
+    // Note: Don't use seekg with mmaped stream. This may affect the performance of some models.
+    // Get file size before seek content.
+    // Blob from cache may have other header, so need to skip this.
+    auto buffer_base = mmemory->data();
+    const auto file_size = mmemory->size();
+    const size_t hdr_pos = m_istream.tellg();
+
+    pass::StreamSerialize::DataHeader hdr = {};
+    std::memcpy(reinterpret_cast<char*>(&hdr), buffer_base + hdr_pos, sizeof hdr);
+
+    // Check if model header contains valid data.
+    bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr) + hdr_pos) &&
+                          (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) &&
+                          (hdr.consts_size == hdr.model_offset - hdr.consts_offset) &&
+                          (hdr.model_size = file_size - hdr.model_offset);
+    if (!is_valid_model) {
+        OPENVINO_THROW("[CPU] Could not deserialize by device xml header.");
+    }
+
+    // Read model input/output precisions.
+    pugi::xml_document xml_in_out_doc;
+    if (hdr.custom_data_size > 0lu) {
+        auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8);
+        if (res.status != pugi::status_ok) {
+            OPENVINO_THROW("[CPU] Could to deserialize custom data.");
+        }
+    }
+
+    // Map blob content
+    std::shared_ptr<ov::AlignedBuffer> weights_buf;
+    if (hdr.consts_size) {
+        weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(buffer_base + hdr.consts_offset,
+                                                                                        hdr.consts_size,
+                                                                                        mmemory);
+    }
+
+    // XML content
+    auto xml_buff = std::make_shared<std::string>();
+    if (m_cache_decrypt) {
+        if (m_decript_from_string) {
+            xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size);
+            *xml_buff = m_cache_decrypt.m_decrypt_str(*xml_buff);
+        } else {
+            xml_buff->reserve(hdr.model_size + 1);
+            m_cache_decrypt.m_decrypt_char(&((*xml_buff)[0]), buffer_base + hdr.model_offset, hdr.model_size);
+        }
+    } else {
+        xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size);
+    }
+    std::shared_ptr<ov::AlignedBuffer> model_buf =
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(&((*xml_buff)[0]),
+                                                                             hdr.model_size,
+                                                                             xml_buff);
+
+    model = m_model_builder(model_buf, weights_buf);
+
+    // Set Info
+    pugi::xml_node root = xml_in_out_doc.child("cnndata");
+    set_info(root, model);
+}
+
+void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
+    const size_t hdr_pos = m_istream.tellg();
+    m_istream.seekg(0, m_istream.end);
+    const size_t file_size = m_istream.tellg();
+    m_istream.seekg(hdr_pos, m_istream.beg);
+
+    pass::StreamSerialize::DataHeader hdr = {};
+    m_istream.read(reinterpret_cast<char*>(&hdr), sizeof hdr);
+
+    // Check if model header contains valid data.
+    bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr) + hdr_pos) &&
+                          (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) &&
+                          (hdr.consts_size == hdr.model_offset - hdr.consts_offset) &&
+                          (hdr.model_size = file_size - hdr.model_offset);
+    if (!is_valid_model) {
+        OPENVINO_THROW("[CPU] Could not deserialize by device xml header.");
+    }
+
+    // read model input/output precisions
+    m_istream.seekg(hdr.custom_data_offset);
+
+    pugi::xml_document xmlInOutDoc;
+    if (hdr.custom_data_size > 0) {
+        std::string xmlInOutString;
+        xmlInOutString.resize(hdr.custom_data_size);
+        m_istream.read(const_cast<char*>(xmlInOutString.c_str()), hdr.custom_data_size);
+        auto res = xmlInOutDoc.load_string(xmlInOutString.c_str());
+        if (res.status != pugi::status_ok) {
+            OPENVINO_THROW("NetworkNotRead: The inputs and outputs information is invalid.");
+        }
+    }
+
+    // read blob content
+    auto data_blob = std::make_shared<ov::Tensor>(ov::element::u8, ov::Shape({hdr.consts_size}));
+    m_istream.seekg(hdr.consts_offset);
+    if (hdr.consts_size) {
+        m_istream.read(static_cast<char *>(data_blob->data(ov::element::u8)), hdr.consts_size);
+    }
+
+    // read XML content
+    auto xml_string = std::make_shared<std::string>();
+    m_istream.seekg(hdr.model_offset);
+    xml_string->resize(hdr.model_size);
+    m_istream.read(const_cast<char*>(xml_string->data()), hdr.model_size);
+    if (m_cache_decrypt) {
+        if (m_decript_from_string) {
+            *xml_string = m_cache_decrypt.m_decrypt_str(*xml_string);
+        } else {
+            m_cache_decrypt.m_decrypt_char(const_cast<char*>(xml_string->data()), xml_string->data(), xml_string->size());
+        }
+    }
+
+    auto model_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(const_cast<char*>(xml_string->data()),
+                                                                                      xml_string->size(),
+                                                                                      xml_string);
+    auto weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::Tensor>>>(reinterpret_cast<char*>(data_blob->data(ov::element::u8)),
+                                                                                       hdr.consts_size,
+                                                                                       data_blob);
+
+    model = m_model_builder(model_buf, weights_buf);
+
+    // Set Info
+    pugi::xml_node root = xmlInOutDoc.child("cnndata");
+    set_info(root, model);
+}
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp
new file mode 100644
index 00000000000000..817041452c9597
--- /dev/null
+++ b/src/plugins/intel_cpu/src/utils/serialize.hpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <pugixml.hpp>
+
+#include "openvino/core/model.hpp"
+#include "openvino/pass/serialize.hpp"
+#include "openvino/runtime/aligned_buffer.hpp"
+#include "openvino/util/mmap_object.hpp"
+#include "utils/codec_xor.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+class ModelSerializer {
+public:
+    typedef std::function<std::string(const std::string&)> CacheEncrypt;
+
+    ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn = {});
+
+    void operator<<(const std::shared_ptr<ov::Model>& model);
+
+private:
+    std::ostream& m_ostream;
+    CacheEncrypt m_cache_encrypt;
+};
+
+class ModelDeserializer {
+public:
+    typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;
+
+    ModelDeserializer(std::istream& model, ModelBuilder fn, const CacheDecrypt& encrypt_fn, bool decript_from_string);
+
+    virtual ~ModelDeserializer() = default;
+
+    void operator>>(std::shared_ptr<ov::Model>& model);
+
+protected:
+    static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);
+
+    void process_mmap(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::MappedMemory>& memory);
+
+    void process_stream(std::shared_ptr<ov::Model>& model);
+
+    std::istream& m_istream;
+    ModelBuilder m_model_builder;
+    CacheDecrypt m_cache_decrypt;
+    bool m_decript_from_string;
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp
index e563a89e3106ec..0f25351a020f60 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp
@@ -192,6 +192,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType
         (activation_type == utils::ActivationTypes::GeluTanh) ||
         (activation_type == utils::ActivationTypes::Relu) ||
         (activation_type == utils::ActivationTypes::Sigmoid) ||
+        (activation_type == utils::ActivationTypes::SoftSign) ||
         (activation_type == utils::ActivationTypes::Swish) ||
         (activation_type == utils::ActivationTypes::LogicalNot) ||
         (activation_type == utils::ActivationTypes::Tanh))) {
diff --git a/src/plugins/intel_gpu/src/graph/gemm.cpp b/src/plugins/intel_gpu/src/graph/gemm.cpp
index a8b196bd45885f..25007cb93b18d5 100644
--- a/src/plugins/intel_gpu/src/graph/gemm.cpp
+++ b/src/plugins/intel_gpu/src/graph/gemm.cpp
@@ -229,7 +229,8 @@ layout gemm_inst::transform_output_layout(const std::shared_ptr<const gemm> prim
                                 (i == 1) ? transposed_input1_pshape :
                                 input_layouts[i].get_partial_shape();
             for (size_t j = 0; j != input_pshape.size(); ++j) {
-                ov::Dimension::merge(output_pshape[j], output_pshape[j], input_pshape[j]);
+                if (input_pshape[j].get_max_length() != input_pshape[j].get_min_length())
+                    ov::Dimension::merge(output_pshape[j], output_pshape[j], input_pshape[j]);
             }
         }
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
index b0c6758af7d909..b7017c414c505f 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@@ -274,6 +274,9 @@ void concat_in_place_optimization::optimize_cascade(concatenation_node& node, st
     }
     node.set_output_layout(concat_layout);
     node.can_be_optimized(true);
+    if (node.is_dynamic()) {
+        node.set_runtime_skippable(true);
+    }
     GPU_DEBUG_TRACE_DETAIL << "[prepare_buffer_fusing] : " << node.id() << " can be optimized" << std::endl;
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index c51b34d81cf153..13634b49fd9d96 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -502,7 +502,7 @@ event::ptr primitive_inst::realloc_if_needed() {
 
     event::ptr ev = nullptr;
     const auto& users = get_user_insts();
-    if (users.size() == 1 && users.front()->get_node().is_type<concatenation>()) {
+    if (users.size() == 1 && users.front()->get_node().is_type<concatenation>() && users.front()->get_node().is_runtime_skippable()) {
         auto concat_inst = users.front();
         if (concat_inst->can_be_optimized()) {
             if (!concat_inst->allocation_done_by_other) {
@@ -656,7 +656,7 @@ event::ptr primitive_inst::realloc_if_needed() {
     }
 
     // Clear out memory if if was previously reused, but now primitive can't be optimized
-    if (_node->is_runtime_skippable() || _node->is_type<crop>()) {
+    if (!_node->is_type<concatenation>() && (_node->is_runtime_skippable() || _node->is_type<crop>())) {
         if (can_be_optimized()) {
             _max_output_layout_count = _deps[0].first->_max_output_layout_count;
             GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO("can_be_optimized");
@@ -1351,7 +1351,8 @@ void primitive_inst::do_runtime_in_place_concat() {
     if (get_users().size() != 1) return;
 
     auto concat_inst = get_user_insts().front();
-    if (!concat_inst->get_node().is_type<concatenation>() || !concat_inst->get_node().can_be_optimized())
+
+    if (!concat_inst->get_node().is_type<concatenation>() || !(concat_inst->get_node().can_be_optimized() && concat_inst->get_node().is_runtime_skippable()))
         return;
 
     if (has_subgraph_dependency(concat_inst->dependencies())) {
diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index 21ba4e656fae0d..fc9648b90e444c 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -1548,6 +1548,18 @@ void program_node::create_onednn_primitive_attributes(
                             mem_desc.get_dims(), mem_desc.get_data_type());
                 } else if (is_type<gemm>()) {
                     size_t rank = cldnn::format::dimension(in.format);
+                    auto in_pshape = in.get_partial_shape();
+                    auto out_pshape = get_output_layout().get_partial_shape();
+                    size_t ones_to_add = std::max(out_pshape.size(), static_cast<size_t>(rank)) - in_pshape.size();
+                    if (ones_to_add > 0) {
+                        layout new_layout = in;
+                        ov::PartialShape new_input_pshape;
+                        std::vector<ov::Dimension> dims(in_pshape.begin(), in_pshape.begin() + in_pshape.size());
+                        new_input_pshape = ov::PartialShape(dims);
+                        new_input_pshape.insert(new_input_pshape.begin(), ones_to_add, 1ul);
+                        new_layout.set_partial_shape(new_input_pshape);
+                        in = new_layout;
+                    }
                     size_t in_batched_size = in.count() / (in.spatial(0) * in.spatial(1));
                     dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, in_batched_size == 1);
                     dnnl::memory::data_type dt = onednn::convert_data_type(in.data_type);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
index acdd3bf84e5224..5f5dea544a66bb 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
@@ -155,6 +155,19 @@ static inline std::vector<std::string> GetOrder(size_t size) {
     return idx_order;
 }
 
+static inline std::vector<std::string> GetFinalIndexOrder(size_t size) {
+    std::vector<std::string> idx_order;
+
+    OPENVINO_ASSERT(size > 4, "[GPU] Only support 5 or 6 dimensions");
+
+    if (size == 5) {
+        idx_order = {"b", "f", "0", "z", "0"};
+    } else if (size == 6) {
+        idx_order = {"b", "f", "0", "w", "z", "0"};
+    }
+    return idx_order;
+}
+
 static std::string GetDictionaryIndexOrder(const gather_params& params, size_t axis) {
     auto idx_order = GetOrder(params.outputs[0].GetDims().size());
     auto input_axis_index_macro = "INPUT_AXIS_INDEX";
@@ -181,21 +194,27 @@ static std::string GetDictionaryIndexOrder(const gather_params& params, size_t a
 }
 
 static std::string GetIndicesIdxOrder(const gather_params& params, size_t axis, int64_t batch_dim) {
-    std::vector<std::string> idx_order = GetOrder(params.outputs[0].GetDims().size());
-    auto zero_val = "0";
+    std::vector<std::string> idx_order;
 
-    size_t indices_dims_num = GetNonEmptyDimsNumber(params.inputs[1]);
+    if ((axis == (size_t)batch_dim) && (axis > 1) && (params.inputs[1].GetDims().size() > 4)) {
+        idx_order = GetFinalIndexOrder(params.outputs[0].GetDims().size());
+    } else {
+        idx_order = GetOrder(params.outputs[0].GetDims().size());
+        auto zero_val = "0";
 
-    // Shift indices of Gather indices input related to output dims
-    for (size_t i = batch_dim; i < indices_dims_num; i++)
-        idx_order[i] = idx_order[axis + i - batch_dim];
+        size_t indices_dims_num = GetNonEmptyDimsNumber(params.inputs[1]);
 
-    for (size_t i = indices_dims_num; i < idx_order.size(); i++)
-        idx_order[i] = zero_val;
+        // Shift indices of Gather indices input related to output dims
+        for (size_t i = batch_dim; i < indices_dims_num; i++)
+            idx_order[i] = idx_order[axis + i - batch_dim];
 
-    // Fix size to inputs[1] dims size
-    for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[1].GetDims().size(); i++)
-        idx_order.pop_back();
+        for (size_t i = indices_dims_num; i < idx_order.size(); i++)
+            idx_order[i] = zero_val;
+
+        // Fix size to inputs[1] dims size
+        for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[1].GetDims().size(); i++)
+            idx_order.pop_back();
+    }
 
     return GetOrderString(idx_order);
 }
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
index adaa572878bff4..51f66f3abb7bfe 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
@@ -1577,6 +1577,76 @@ class gemm_gpu_tests: public ::testing::Test {
             ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i;
         }
     }
+
+    void test_dynamic_static_broadcast_3dim(std::vector<size_t> BMKN, bool is_caching_test, const double abs_error = 0.0001) {
+        tests::random_generator rg;
+        rg.set_seed(GET_SUITE_NAME);
+
+        auto& engine = get_test_engine();
+        cldnn::layout input0_layout;
+        cldnn::layout input1_layout;
+
+        std::vector<int64_t> input0_order = {0, 1, 2};
+        std::vector<int64_t> input1_order = {0, 1, 2};
+        std::vector<int64_t> output_order = {0, 1, 2};
+
+        size_t BATCH_SIZE = BMKN[0];
+        size_t M_SIZE = BMKN[1];
+        size_t K_SIZE = BMKN[2];
+        size_t N_SIZE = BMKN[3];
+
+        ov::Shape input0_shape = { BATCH_SIZE, M_SIZE, K_SIZE };
+        ov::Shape input1_shape = {          1, K_SIZE, N_SIZE };
+        ov::Shape output_shape = { BATCH_SIZE, M_SIZE, N_SIZE };
+
+        input0_layout = layout{ov::PartialShape::dynamic(input0_shape.size()), data_types::f16, format::bfyx};
+        input1_layout = layout{ov::PartialShape(input1_shape), data_types::f16, format::bfyx};
+
+        auto input0_mem = engine.allocate_memory(layout{ov::PartialShape(input0_shape), data_types::f16, format::bfyx});
+        auto input1_mem = engine.allocate_memory(layout{ov::PartialShape(input1_shape), data_types::f16, format::bfyx});
+
+        auto input_0_data = rg.generate_random_1d<ov::float16>(ov::shape_size(input0_shape), -2, 2);
+        auto input_1_data = rg.generate_random_1d<ov::float16>(ov::shape_size(input1_shape), -2, 2);
+
+        set_values(input0_mem, input_0_data);
+        set_values(input1_mem, input_1_data);
+
+        topology topology;
+        topology.add(input_layout("input0", input0_layout),
+                     input_layout("input1", input1_layout),
+                     gemm("gemm", { input_info("input0"), input_info("input1") }, data_types::f16, input0_order, input1_order, output_order)
+        );
+
+        ExecutionConfig config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::optimize_data(true));
+        config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+        network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+        network->set_input_data("input0", input0_mem);
+        network->set_input_data("input1", input1_mem);
+
+        auto outputs = network->execute();
+
+        auto output_mem = outputs.at("gemm").get_memory();
+        cldnn::mem_lock<ov::float16> output_ptr(output_mem, get_test_stream());
+
+        std::vector<ov::float16> ref_out_data;
+        ref_out_data.resize(ov::shape_size(output_shape));
+
+        ov::reference::matmul<ov::float16>(input_0_data.data(),
+                                     input_1_data.data(),
+                                     ref_out_data.data(),
+                                     input0_shape,
+                                     input1_shape,
+                                     output_shape,
+                                     false,
+                                     false);
+
+        ASSERT_EQ(output_ptr.size(), ref_out_data.size());
+
+        for (uint32_t i = 0; i < ref_out_data.size(); ++i) {
+            ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i;
+        }
+    }
 };
 
 TEST_F(gemm_gpu_tests, basic_bfyx_t2_inplace_crop_with_pad) {
@@ -1710,6 +1780,10 @@ TEST_F(gemm_gpu_tests, transpose_matmul_static_4d_f32_n_tile_32_input1_ylast) {
     this->test_transpose_matmul_f32(4, false, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 1, 2, 3}, /*input1_order*/{0, 1, 3, 2});
 }
 
+TEST_F(gemm_gpu_tests, test_dynamic_static_broadcast_3dim) {
+    this->test_dynamic_static_broadcast_3dim(/*BMKN*/{2, 16, 2, 2}, false);
+}
+
 TEST_F(gemm_gpu_tests, transpose_matmul_in0_indirect) {
     this->test_transpose_indirect(false, true, false);
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index 1c75b1cbdf3211..2fe90eb82c41bb 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -442,13 +442,14 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
 }
 
 void ov::npuw::CompiledModel::finalize_weights_bank() {
+    LOG_INFO("Finalizing weights bank...");
     // Register lazy tensors
     for (std::size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
         auto& comp_model_desc = m_compiled_submodels[idx];
 
         // Skip optimized out and non-functions
         if (!comp_model_desc.compiled_model && !comp_model_desc.replaced_by) {
-            return;
+            continue;
         }
 
         const auto real_idx = comp_model_desc.replaced_by.value_or(idx);
@@ -489,6 +490,45 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
             comp_model_desc.is_remote[tidx] = m_weights_bank->is_remote(lt);
         }
     }
+
+    LOG_INFO("Done.");
+}
+
+std::string ov::npuw::CompiledModel::global_mem_device() const {
+    // Force globally set device if set
+    const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>();
+    if (!device_alloc.empty()) {
+        return device_alloc;
+    }
+
+    // Check if there is at least 1 NPU submodel
+    for (std::size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
+        auto& comp_model_desc = m_compiled_submodels[idx];
+        if (!comp_model_desc.compiled_model) {
+            continue;
+        }
+        if (ov::npuw::util::starts_with(*comp_model_desc.device_it, "NPU")) {
+            return "NPU";
+        }
+    }
+
+    return "CPU";
+}
+
+std::string ov::npuw::CompiledModel::funcall_mem_device(const std::size_t idx) const {
+    // FIXME: currently we allocate intermediate tensors for EVERY submodel.
+    //        It's not feasible to allocate them in L0 due to high memory consumption.
+    //        Until we make such memory reusable, hard-coding those tensors to CPU.
+    return "CPU";
+
+    // Force globally set device if set
+    const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>();
+    if (!device_alloc.empty()) {
+        return device_alloc;
+    }
+
+    auto& comp_model_desc = m_compiled_submodels[idx];
+    return *comp_model_desc.device_it;
 }
 
 void ov::npuw::CompiledModel::remove_long_output_names(const std::shared_ptr<ov::Model>& model) {
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
index 7f308d46094f35..4152d08275ba6d 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
@@ -74,6 +74,10 @@ class CompiledModel : public ov::ICompiledModel {
 
     void finalize_weights_bank();
 
+    std::string global_mem_device() const;
+
+    std::string funcall_mem_device(const std::size_t idx) const;
+
     std::shared_ptr<::intel_npu::OptionsDesc> m_options_desc;
     ::intel_npu::Config m_cfg;
     GetPropertiesMap m_prop_to_opt;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp
index 93f9f12fe86048..fbbabf083bccd8 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp
@@ -16,7 +16,6 @@
 #include "openvino/core/except.hpp"
 #include "openvino/core/parallel.hpp"
 #include "openvino/runtime/iasync_infer_request.hpp"
-#include "openvino/runtime/make_tensor.hpp"
 #include "plugin.hpp"
 #include "util.hpp"
 #include "weights_bank.hpp"
@@ -49,7 +48,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
         // FIXME: Shouldn't this be handled by the base class? (in create_tensor)
         // A special case for function calls
         if (comp_model_desc.replaced_by) {
-            // Pre-allocate output tesnors for this function call
+            // Pre-allocate output tensors for this function call
             const auto real_idx = comp_model_desc.replaced_by.value();
             auto& proto_comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
             auto& proto_comp_model = proto_comp_model_desc.compiled_model;
@@ -68,13 +67,13 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
                     for (auto&& p : proto_comp_model_desc.spatial->params) {
                         const auto& iport = proto_comp_model_desc.compiled_model->inputs()[p.idx];
                         m_spatial_io[real_idx].input_tails[p.idx] =
-                            ov::get_tensor_impl(ov::Tensor(iport.get_element_type(), iport.get_shape()));
+                            allocTensor(iport, m_npuw_model->funcall_mem_device(real_idx));
                     }
                     const auto num_outs = proto_comp_model_desc.compiled_model->outputs().size();
                     for (std::size_t out_idx = 0u; out_idx < num_outs; out_idx++) {
                         const auto& oport = proto_comp_model_desc.compiled_model->outputs()[out_idx];
                         m_spatial_io[real_idx].output_tails[out_idx] =
-                            ov::get_tensor_impl(ov::Tensor(oport.get_element_type(), oport.get_shape()));
+                            allocTensor(oport, m_npuw_model->funcall_mem_device(real_idx));
                     }
                 }
             }  // if(spatial)
@@ -88,7 +87,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
                     shape[proto_comp_model_desc.spatial->out_dim] = proto_comp_model_desc.spatial->range;
                 }
                 m_funcall_result[LinkFrom{i, out_idx}] =
-                    ov::get_tensor_impl(ov::Tensor(port.get_element_type(), shape));
+                    allocTensor(port.get_element_type(), shape, m_npuw_model->funcall_mem_device(real_idx));
             }
             if (real_idx != i) {
                 // If this function call is NOT the function body, do nothing here - the original
@@ -153,7 +152,9 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
     LOG_INFO("Preallocating input tensors...");
     for (size_t i = 0; i < m_npuw_model->inputs().size(); i++) {
         const auto& port = m_npuw_model->inputs()[i];
-        m_input_tensors.push_back(ov::get_tensor_impl(ov::Tensor(port.get_element_type(), port.get_shape())));
+        ov::SoPtr<ov::ITensor> allocated = allocTensor(port, m_npuw_model->global_mem_device());
+        m_input_tensors.push_back(allocated);
+        m_input_allocated.insert(allocated->data());
         m_port_to_tensor[port] = TensorStorage{m_input_tensors.back(), true};
     }  // for(inputs)
 
@@ -173,7 +174,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
         const auto& tensor =
             funcall_result_iter != m_funcall_result.end()
                 ? funcall_result_iter->second  // Function calls have their tensors allocated, so just use one
-                : ov::get_tensor_impl(ov::Tensor(port.get_element_type(), port.get_shape()));
+                : allocTensor(port, m_npuw_model->global_mem_device());
 
         m_output_tensors.push_back(tensor);
         m_port_to_tensor[port] = TensorStorage{tensor, true};
@@ -421,7 +422,7 @@ void ov::npuw::JustInferRequest::bind_global_parameters(std::size_t idx) {
         LOG_BLOCK();
         if (!is_spatial_param(sub_in_idx)) {
             // Input parameter is non-spatial, do normal handling
-            if (do_copy) {
+            if (do_copy || m_input_allocated.count(g_tnsr->data()) == 0) {
                 LOG_DEBUG("Will be copied");
                 copy_list.emplace_back(g_tnsr, s_port);
             } else {
@@ -919,6 +920,29 @@ void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool
     }  // if (replaced_by)
 }
 
+ov::SoPtr<ov::ITensor> ov::npuw::JustInferRequest::allocTensor(const ov::element::Type type,
+                                                               const ov::Shape& shape,
+                                                               const std::string& device) {
+    if (device == "CPU" || ov::shape_size(shape) == 0) {
+        return ov::get_tensor_impl(ov::Tensor(type, shape));
+    }
+
+    ov::SoPtr<ov::ITensor> remote_tensor;
+    ov::Tensor allocated_tensor;
+    {
+        std::lock_guard<std::mutex> guard(m_alloc_mutex);
+        m_remote_ctx = m_npuw_model->get_plugin()->get_core()->get_default_context(device)._ptr;
+        remote_tensor = m_remote_ctx->create_host_tensor(type, shape);
+        allocated_tensor = ov::make_tensor(remote_tensor);
+    }
+    return ov::get_tensor_impl(allocated_tensor);
+}
+
+ov::SoPtr<ov::ITensor> ov::npuw::JustInferRequest::allocTensor(const ov::Output<const ov::Node>& node,
+                                                               const std::string& device) {
+    return allocTensor(node.get_element_type(), node.get_shape(), device);
+}
+
 void ov::npuw::JustInferRequest::subscribe_subrequest(std::size_t idx, Completed cb) {
     get_real_subrequest(idx)->set_callback(std::move(cb));
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp
index 2544647dd0066c..7335b54c30062e 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp
@@ -6,10 +6,15 @@
 
 #include <limits>
 #include <map>
+#include <mutex>
 #include <optional>
 #include <vector>
 
 #include "base_sync_infer_request.hpp"
+#include "openvino/runtime/iplugin.hpp"
+#include "openvino/runtime/iremote_context.hpp"
+#include "openvino/runtime/make_tensor.hpp"
+#include "openvino/runtime/tensor.hpp"
 
 namespace ov {
 namespace npuw {
@@ -59,6 +64,9 @@ class JustInferRequest final : public IBaseInferRequest {
     void connect_subrequests();
     void recreate_subrequests(std::size_t idx);
 
+    ov::SoPtr<ov::ITensor> allocTensor(const ov::element::Type type, const ov::Shape& shape, const std::string& device);
+    ov::SoPtr<ov::ITensor> allocTensor(const ov::Output<const ov::Node>& node, const std::string& device);
+
     using LinkFrom = std::pair<std::size_t /* Subrequest index */
                                ,
                                std::size_t /* Subrequest output index */
@@ -93,6 +101,11 @@ class JustInferRequest final : public IBaseInferRequest {
         map_t global_results;  // result idx -> output idx
     };
     std::vector<GlobalIO> m_subrequests_gio;
+
+    std::mutex m_alloc_mutex;
+    std::shared_ptr<ov::IRemoteContext> m_remote_ctx = nullptr;
+
+    std::unordered_set<void*> m_input_allocated;
 };
 
 }  // namespace npuw
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp
index 4b8973b5bb94ae..46b6cb7b12681d 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp
@@ -74,8 +74,8 @@ std::vector<Avoid> getAvoids(::intel_npu::Config& cfg) {
 
     std::string avoids_opt = cfg.getString<::intel_npu::NPUW_ONLINE_AVOID>();
     if (avoids_opt.empty()) {
-        LOG_WARN(::intel_npu::NPUW_ONLINE_AVOID().key()
-                 << " property is not set! NPU device will be prioritized for every subgraph.");
+        LOG_VERB(::intel_npu::NPUW_ONLINE_AVOID().key()
+                 << " property is not set. NPU device will be prioritized for every subgraph.");
         return {};
     }
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index 58a8219f497fed..5e3f12fedf68a6 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -345,6 +345,10 @@ void Partitioner::identifySubgraphs() {
     }
     LOG_INFO("Caching done: " << node_id_cache.size() << " layers.");
 
+    // Accumulate knowledge about known OV layers when walking
+    // over a topologically-sorted list.
+    std::unordered_set<NodeSPtr> nodes_known_now;
+
     // FIXME: Need to do some sanity checks here. What if partitioning
     // has been generated for another variation of this model?
     // What if that was a completely different model?
@@ -458,16 +462,19 @@ void Partitioner::identifySubgraphs() {
                     continue;
                 } else if ((ov::is_type<ov::op::v8::Slice>(input_node) ||
                             ov::is_type<ov::op::v0::Convert>(input_node)) &&
+                           !nodes_known_now.count(input_node) &&
                            ov::op::util::is_parameter(input_node->input(0).get_source_output().get_node_shared_ptr())) {
                     // So the situation is:
-                    // - a group has an input layer
+                    //  - a group has an input layer
                     //  - which reads from a Slice or Convert
                     //  - which reads from a Parameter
+                    //  - not a part of any prior group
                     // This happens when an offline plan is used with a kvcache
                     // model extended with slices to maintain zero-copy (LLM case)
                     auto extra_param = input_node->input(0).get_source_output().get_node_shared_ptr();
                     input_mapping[input_node] = extra_param;
                     extra_params.insert(extra_param);
+                    LOG_DEBUG("Registered extra param " << extra_param);
                 } else {
                     // Ok, this input is connected to some other node's output
                     // Replace this connection with a link to a newly created Parameter
@@ -671,7 +678,8 @@ void Partitioner::identifySubgraphs() {
             }
         }
         this_group_idx++;  // FIXME: indexed() is better!
-    }                      // for (partitions)
+        nodes_known_now.insert(group_nodes.begin(), group_nodes.end());
+    }  // for (partitions)
 
     // Return what we've got here
     std::vector<Subgraph>& result = P.subgraphs;
@@ -1387,14 +1395,16 @@ void Partitioner::matchParameters(const std::string& func_name) {
             this_model_nodes.insert(node_ptr.get());
         }
         for (auto&& node : call->get_ordered_ops()) {
+            using ov::npuw::util::at::_;
+
             if (ov::op::util::is_parameter(node)) {
                 PKey pkey;
                 for (auto&& iport : node->output(0).get_target_inputs()) {
                     if (this_model_nodes.count(iport.get_node()) > 0) {
                         LOG_DEBUG("Register link " << iport.get_node()->get_friendly_name() << " : "
                                                    << iport.get_index());
-                        pkey.insert(
-                            PReader{layer_to_prototype.at(iport.get_node()->get_friendly_name()), iport.get_index()});
+                        pkey.insert(PReader{_(layer_to_prototype).at(iport.get_node()->get_friendly_name()),
+                                            iport.get_index()});
                     }
                 }
                 LOG_DEBUG("Find orig parameter for " << node);
diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp
index 1704314aee75ea..02d2c8c097811e 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp
@@ -65,8 +65,9 @@ ov::Tensor permute(const ov::Tensor& t, const std::vector<std::size_t>& axes);
 ov::Tensor concat(const std::vector<ov::Tensor>& tt, std::size_t axis);
 
 namespace at {
-template <class M>
+template <class M_>
 struct Impl {
+    using M = typename std::decay<M_>::type;
     using V = typename M::mapped_type;
 
     M* m = nullptr;
@@ -96,6 +97,11 @@ Impl<M> _(M* pM) {
     return Impl<M>(pM);
 }
 
+template <typename M>
+Impl<M> _(M&& m) {
+    return Impl<M>(&m);
+}
+
 template <typename M>
 Impl<M> _(std::shared_ptr<M> pM) {
     return Impl<M>(pM.get());
diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
index d142f72f9b7126..2a79bf33ef9a53 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
@@ -95,13 +95,16 @@ ov::Tensor Bank::unsafe_eval_and_alloc(const LazyTensor& tensor, const std::stri
         return transformed_tensor;
     }
 
-    // FIXME: L0 allocation may crash when run in parallel
-    std::lock_guard<std::mutex> guard(m_alloc_mutex);
-
-    m_remote_ctx = m_core->get_default_context(device_for_alloc)._ptr;
-    auto remote_tensor =
-        m_remote_ctx->create_host_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape());
-    auto allocated_tensor = ov::make_tensor(remote_tensor);
+    ov::SoPtr<ov::ITensor> remote_tensor;
+    ov::Tensor allocated_tensor;
+    {
+        // FIXME: L0 allocation may crash when run in parallel
+        std::lock_guard<std::mutex> guard(m_alloc_mutex);
+        m_remote_ctx = m_core->get_default_context(device_for_alloc)._ptr;
+        remote_tensor =
+            m_remote_ctx->create_host_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape());
+        allocated_tensor = ov::make_tensor(remote_tensor);
+    }
     transformed_tensor.copy_to(allocated_tensor);
     m_device_bank[device_for_alloc][tensor] = allocated_tensor;
     return allocated_tensor;
diff --git a/src/plugins/intel_npu/src/plugin/src/extension.cpp b/src/plugins/intel_npu/src/plugin/src/extension.cpp
new file mode 100644
index 00000000000000..9626ffbd5e909a
--- /dev/null
+++ b/src/plugins/intel_npu/src/plugin/src/extension.cpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/core/extension.hpp"
+
+#include "openvino/core/op_extension.hpp"
+#include "ov_ops/rms.hpp"
+
+#define OP_EXTENSION(NAME) std::make_shared<ov::OpExtension<NAME>>(),
+
+#define NPU_SUPPORTED_EXTENSIONS OP_EXTENSION(ov::op::internal::RMS)
+
+OPENVINO_CREATE_EXTENSIONS(std::vector<ov::Extension::Ptr>({NPU_SUPPORTED_EXTENSIONS}));
diff --git a/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp b/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp
index 521a8dc60e3fe0..367818ebbf9572 100644
--- a/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp
+++ b/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp
@@ -48,6 +48,10 @@ class MockICore : public ov::ICore {
                 (const));
     MOCK_METHOD(std::shared_ptr<ov::Model>, read_model, (const std::string&, const ov::Tensor&, bool), (const));
     MOCK_METHOD(std::shared_ptr<ov::Model>, read_model, (const std::string&, const std::string&), (const));
+    MOCK_METHOD(std::shared_ptr<ov::Model>,
+                read_model,
+                (const std::shared_ptr<AlignedBuffer>&, const std::shared_ptr<AlignedBuffer>&),
+                (const));
     MOCK_METHOD(ov::SoPtr<ov::IRemoteContext>, get_default_context, (const std::string&), (const));
     MOCK_METHOD(ov::SoPtr<ov::ICompiledModel>,
                 import_model,
diff --git a/tests/layer_tests/common/mo_convert_test_class.py b/tests/layer_tests/common/mo_convert_test_class.py
index e800e76ed98a88..6a57339cedf111 100644
--- a/tests/layer_tests/common/mo_convert_test_class.py
+++ b/tests/layer_tests/common/mo_convert_test_class.py
@@ -1,10 +1,33 @@
 # Copyright (C) 2018-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import sys
 from pathlib import Path
 
 from common.utils.common_utils import generate_ir
+from common.utils.common_utils import shell
+
 from openvino.test_utils import compare_functions
+from openvino.tools.ovc import ovc
+
+
+def generate_ir_ovc(coverage=False, **kwargs):
+    # Get OVC file directory
+    ovc_path = Path(ovc.__file__).parent
+
+    ovc_runner = ovc_path.joinpath('main.py').as_posix()
+    if coverage:
+        params = [sys.executable, '-m', 'coverage', 'run', '-p', '--source={}'.format(ovc_runner.parent),
+                  '--omit=*_test.py', ovc_runner]
+    else:
+        params = [sys.executable, ovc_runner]
+    for key, value in kwargs.items():
+        if key == "input_model":
+            params.append((str(value)))
+        else:
+            params.extend(("--{}".format(key), str(value)))
+    exit_code, stdout, stderr = shell(params)
+    return exit_code, stderr
 
 
 class CommonMOConvertTest:
@@ -54,16 +77,26 @@ def _test(self, temp_dir, test_params, ref_params):
         flag, msg = compare_functions(ir_test, ir_ref)
         assert flag, msg
 
-    def _test_by_ref_graph(self, temp_dir, test_params, ref_graph, compare_tensor_names=True, compare_layout=True):
+    def _test_by_ref_graph(self, temp_dir, test_params, ref_graph, compare_tensor_names=True,
+                           compare_layout=True, ovc=False):
         """
         Generates IR using MO Python API, reads it and compares with reference graph.
         """
         from openvino import Core
         core = Core()
 
-        test_params.update({"model_name": 'model_test', "output_dir": temp_dir})
-        self.generate_ir_python_api(**test_params)
-        ir_test = core.read_model(Path(temp_dir, 'model_test.xml'))
+        if ovc:
+            ir_file_name = Path(temp_dir, 'model_test.xml')
+            test_params.update({"output_model": ir_file_name})
+            exit_code, stderr = generate_ir_ovc(coverage=False, **test_params)
+            assert not exit_code, stderr
+        else:
+            test_params.update({"model_name": 'model_test', "output_dir": temp_dir})
+            ir_file_name = Path(temp_dir, 'model_test.xml')
+            self.generate_ir_python_api(**test_params)
+
+        ir_test = core.read_model(ir_file_name)
+
         flag, msg = compare_functions(ir_test, ref_graph, compare_tensor_names=compare_tensor_names)
         assert flag, msg
 
diff --git a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py
index 3fbe25ee130e69..7dc40e310330cf 100644
--- a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py
+++ b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py
@@ -1,14 +1,17 @@
 # Copyright (C) 2018-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import os
+import tempfile
 import unittest
 from typing import Tuple, List
 
 import numpy as np
-import openvino.runtime as ov
 import pytest
 import torch
 from common.mo_convert_test_class import CommonMOConvertTest
+
+import openvino.runtime as ov
 from openvino.runtime import PartialShape, Dimension, Model, Type
 
 
@@ -1408,3 +1411,167 @@ def test_conversion_params(self, params, ie_device, precision, ir_version,
         test_params.update({'input_model': fw_model})
         self._test_by_ref_graph(temp_dir, test_params,
                                 ref_model, compare_tensor_names=False)
+
+
+def pytorch_nn_module_with_enabled_compression(tmp_dir):
+    import torch
+
+    class NeuralNetwork(torch.nn.Module):
+        def __init__(self):
+            super(NeuralNetwork, self).__init__()
+            self.y = torch.arange(10, dtype=torch.float16)
+
+        def forward(self, x, z):
+            return (x + self.y.to(torch.float32)) * z
+
+    param_1 = ov.opset13.parameter([10], dtype=np.float32)
+    param_2 = ov.opset13.parameter([10], dtype=np.float32)
+    const_1 = ov.opset13.constant(np.arange(10), dtype=np.float16)
+    convert_1 = ov.opset13.convert(const_1, np.float32)
+    add_1 = ov.opset13.add(param_1, convert_1)
+    mul_1 = ov.opset13.multiply(add_1, param_2)
+
+    ov_model_ref = Model([mul_1], [param_1, param_2], "test")
+    fw_model = NeuralNetwork()
+    return fw_model, ov_model_ref, {'input': [([10], np.float32), ([10], np.float32)],
+                                    'example_input': (torch.zeros(10), torch.zeros(10))}
+
+
+def pytorch_nn_module_with_disabled_compression(tmp_dir):
+    import torch
+
+    class NeuralNetwork(torch.nn.Module):
+        def __init__(self):
+            super(NeuralNetwork, self).__init__()
+            self.y = torch.arange(10, dtype=torch.float32)
+
+        def forward(self, x, z):
+            return (x + self.y) * z
+
+    param_1 = ov.opset13.parameter([-1], dtype=np.float32)
+    param_2 = ov.opset13.parameter([-1], dtype=np.float32)
+    const_1 = ov.opset13.constant(np.arange(10), dtype=np.float32)
+    add_1 = ov.opset13.add(param_1, const_1)
+    mul_1 = ov.opset13.multiply(add_1, param_2)
+
+    ov_model_ref = Model([mul_1], [param_1, param_2], "test")
+    fw_model = NeuralNetwork()
+    return fw_model, ov_model_ref, {'example_input': (torch.zeros(10), torch.zeros(10)),
+                                    'compress_to_fp16': 'False'}
+
+
+class TestConvertModelForPyTorchModelOnDisk(CommonMOConvertTest):
+    test_data = [
+        'create_pytorch_nn_module_case1',
+        'create_pytorch_nn_module_case2',
+        'create_pytorch_nn_module_case3',
+        'create_pytorch_nn_module_sample_input_int32_two_inputs',
+        'pytorch_nn_module_with_enabled_compression'
+    ]
+
+    @pytest.mark.parametrize('create_model', test_data)
+    @pytest.mark.parametrize('model_format', ['exported_program', 'torch_script'])
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_convert_model_for_pytorch_model_on_disk(self, create_model, model_format,
+                                                     ie_device, precision, ir_version,
+                                                     temp_dir, use_legacy_frontend):
+        fw_model, graph_ref, ovc_params = eval(create_model)(temp_dir)
+
+        with tempfile.NamedTemporaryFile(delete=False) as tmpfile:
+            if model_format == 'torch_script':
+                scripted_model = torch.jit.script(fw_model)
+                scripted_model.save(tmpfile.name)
+                test_params = {'input_model': tmpfile.name}
+                if ovc_params is not None:
+                    test_params.update(ovc_params)
+            else:
+                example_input = ovc_params['example_input']
+                exported_program = torch.export.export(fw_model, example_input)
+                torch.export.save(exported_program, tmpfile.name)
+                test_params = {'input_model': tmpfile.name}
+                if ovc_params is not None:
+                    test_params.update(ovc_params)
+
+            self._test_by_ref_graph(temp_dir, test_params,
+                                    graph_ref, compare_tensor_names=False)
+        os.remove(tmpfile.name)
+
+
+def ovc_case1(tmp_dir):
+    pt_model = make_pt_model_two_inputs()
+    ref_model = make_ref_pt_model_two_inputs([1, 3, 10, 10])
+
+    sample_input1 = torch.zeros(1, 3, 10, 10)
+    sample_input2 = torch.zeros(1, 3, 10, 10)
+    sample_input = sample_input1, sample_input2
+
+    return pt_model, ref_model, {'example_input': sample_input}
+
+
+def pytorch_nn_module_case2(tmp_dir):
+    pt_model = make_pt_model_two_inputs()
+    ref_model = make_ref_pt_model_two_inputs([-1, 3, -1, -1])
+
+    sample_input1 = torch.zeros(1, 3, 10, 10)
+    sample_input2 = torch.zeros(1, 3, 10, 10)
+    sample_input = sample_input1, sample_input2
+
+    return pt_model, ref_model, {'input': '[-1,3,-1,-1],[-1,3,-1,-1]',
+                                 'example_input': sample_input}
+
+
+def nested_dict_input_ovc_case2(tmp_dir):
+    class PTModel(torch.nn.Module):
+        def forward(self, a, b):
+            return a["1"] * a["2"] + b
+
+    net = PTModel()
+    a1 = ov.opset10.parameter(PartialShape([-1]), dtype=np.float32)
+    a2 = ov.opset10.parameter(PartialShape([-1]), dtype=np.float32)
+    b = ov.opset10.parameter(PartialShape([-1]), dtype=np.float32)
+    mul = ov.opset10.multiply(a1, a2)
+    add = ov.opset10.add(mul, b)
+    ref_model = Model([add], [a1, a2, b], "test")
+    example_input = (
+        {
+            "1": torch.tensor([1, 2], dtype=torch.float32),
+            "2": torch.tensor([3, 4], dtype=torch.float32)
+        },
+        torch.tensor([5, 6], dtype=torch.float32)
+    )
+    return net, ref_model, {'example_input': example_input}
+
+
+class TestOVCForExportedProgramOnDisk(CommonMOConvertTest):
+    test_data = [
+        'create_pytorch_nn_module_case1',
+        'pytorch_nn_module_case2',
+        'nested_dict_input_ovc_case2',
+        'pytorch_nn_module_with_disabled_compression'
+    ]
+
+    @pytest.mark.parametrize('create_model', test_data)
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    def test_ovc_for_exported_program_on_disk(self, create_model,
+                                              ie_device, precision, ir_version,
+                                              temp_dir, use_legacy_frontend):
+        fw_model, graph_ref, ovc_params = eval(create_model)(temp_dir)
+        example_input = ovc_params['example_input']
+        del ovc_params['example_input']
+
+        ep_file_name = None
+        with tempfile.NamedTemporaryFile(delete=False) as tmpfile:
+            exported_program = torch.export.export(fw_model, tuple(example_input))
+            torch.export.save(exported_program, tmpfile.name)
+            ep_file_name = tmpfile.name
+
+            test_params = {'input_model': ep_file_name}
+            if ovc_params is not None:
+                test_params.update(ovc_params)
+
+            self._test_by_ref_graph(temp_dir, test_params,
+                                    graph_ref, compare_tensor_names=False,
+                                    ovc=True)
+        os.remove(ep_file_name)
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_AdjustHue.py b/tests/layer_tests/tensorflow_tests/test_tf_AdjustHue.py
index fe5e18d2b07f94..cdf77a086ed76b 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_AdjustHue.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_AdjustHue.py
@@ -50,8 +50,6 @@ def create_adjust_hue_net(self, input_shape, input_type, special_case):
     def test_adjust_hue_basic(self, input_shape, input_type, special_case,
                               ie_device, precision, ir_version, temp_dir,
                               use_legacy_frontend):
-        if ie_device == 'GPU' and (input_shape == [2, 4, 4, 3] or input_shape == [3, 4, 12, 12, 3]):
-            pytest.skip('150766: Accuracy issue on GPU')
         self._test(*self.create_adjust_hue_net(input_shape, input_type, special_case),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_legacy_frontend=use_legacy_frontend)
diff --git a/tools/benchmark_tool/setup.py b/tools/benchmark_tool/setup.py
index e78eb7aae37847..0df9a9bc92379a 100644
--- a/tools/benchmark_tool/setup.py
+++ b/tools/benchmark_tool/setup.py
@@ -112,5 +112,5 @@ def read_requirements(path: str) -> List[str]:
     ],
     packages=find_packages(),
     install_requires=read_requirements('requirements.txt'),
-    python_requires='>=3.8',
+    python_requires='>=3.9',
 )
diff --git a/tools/mo/requirements.txt b/tools/mo/requirements.txt
index efc6bc8e1bc778..fea66495f303ee 100644
--- a/tools/mo/requirements.txt
+++ b/tools/mo/requirements.txt
@@ -1,6 +1,5 @@
 -c ../constraints.txt
 numpy>=1.16.6,<2.0.0
-importlib-metadata; python_version < "3.8" and sys_platform == "win32"
 networkx
 defusedxml
 openvino-telemetry
diff --git a/tools/openvino_dev/CMakeLists.txt b/tools/openvino_dev/CMakeLists.txt
index d8488e97d1082e..924c83abc9bff8 100644
--- a/tools/openvino_dev/CMakeLists.txt
+++ b/tools/openvino_dev/CMakeLists.txt
@@ -31,29 +31,6 @@ else()
 endif()
 set(WHEEL_BUILD "${OpenVINO_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE)
 
-# outbound requirements.txt files for openvino-dev package
-
-ov_cpack_add_component(${OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES} HIDDEN)
-
-set(REQUIREMENTS_IN "${CMAKE_CURRENT_SOURCE_DIR}/requirements_dev.txt.in")
-set(EXTRAS_LIST _ caffe kaldi onnx pytorch tensorflow tensorflow2)
-
-foreach(EXTRAS IN LISTS EXTRAS_LIST)
-    if(EXTRAS STREQUAL "_")
-        set(REQUIREMENTS_OUT "requirements.txt")
-        set(EXTRAS "")
-    else()
-        set(REQUIREMENTS_OUT "requirements_${EXTRAS}.txt")
-        set(EXTRAS "[${EXTRAS}]")
-    endif()
-    configure_file(${REQUIREMENTS_IN} ${REQUIREMENTS_OUT})
-
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${REQUIREMENTS_OUT}
-            DESTINATION ${OV_CPACK_DEVREQDIR}
-            COMPONENT ${OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES}
-            ${OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL})
-endforeach()
-
 # check __init__.py files alignment
 
 function(ov_check_init_files_alignment init_files)
diff --git a/tools/ovc/openvino/tools/ovc/convert_impl.py b/tools/ovc/openvino/tools/ovc/convert_impl.py
index 2eb2f2adc133f1..dc0694f0a405b5 100644
--- a/tools/ovc/openvino/tools/ovc/convert_impl.py
+++ b/tools/ovc/openvino/tools/ovc/convert_impl.py
@@ -12,7 +12,6 @@
 from pathlib import Path
 from typing import Iterable, Callable
 
-
 try:
     import openvino_telemetry as tm
     from openvino_telemetry.backend import backend_ga4
@@ -34,8 +33,10 @@
 from openvino.tools.ovc.logger import init_logger
 from openvino.tools.ovc.telemetry_utils import send_params_info, send_conversion_result, \
     init_mo_telemetry
-from openvino.tools.ovc.moc_frontend.pytorch_frontend_utils import get_pytorch_decoder, extract_input_info_from_example
+from openvino.tools.ovc.moc_frontend.pytorch_frontend_utils import get_pytorch_decoder, \
+    extract_input_info_from_example, get_pytorch_decoder_for_model_on_disk
 from openvino.tools.ovc.moc_frontend.paddle_frontend_utils import paddle_frontend_converter
+
 try:
     from openvino.tools.ovc.moc_frontend.jax_frontend_utils import get_jax_decoder
 except:
@@ -232,7 +233,7 @@ def check_model_object(argv):
                                                                     paddle.fluid.dygraph.layers.Layer) or isinstance(
             model, paddle.fluid.executor.Executor):
             return "paddle"
-        
+
     if 'jax' in sys.modules:
         import jax
         if isinstance(model, (jax.core.Jaxpr, jax.core.ClosedJaxpr)):
@@ -475,9 +476,9 @@ def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used):
                     get_jax_decoder(args['input_model'], args)
                 else:
                     raise Error("JAX Frontend is not available.")
-                
 
         argv = pack_params_to_args_namespace(args, cli_parser, python_api_used)
+
         argv.framework = model_framework
         argv.is_python_object = inp_model_is_object
 
@@ -491,8 +492,22 @@ def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used):
 
         argv.framework = model_framework
 
+        orig_input_model = argv.input_model
+        pytorch_model_on_disk = False
+        if argv.framework is None and get_pytorch_decoder_for_model_on_disk(argv, args):
+            # try to load a model from disk as TorchScript or ExportedProgram
+            # TorchScriptPythonDecoder or TorchFXPythonDecoder object will be assigned to argv.input_model
+            # saved TorchScript and ExportedModel model can be passed to both ovc tool and Python convert_model
+            pytorch_model_on_disk = True
+
         ov_model = driver(argv, {"conversion_parameters": non_default_params})
 
+        if pytorch_model_on_disk:
+            # release memory allocated for temporal object
+            del argv.input_model
+            # restore original model name in arguments for tool reporting
+            argv.input_model = orig_input_model
+
         if inp_model_is_object and model_framework == "paddle":
             if paddle_runtime_converter:
                 paddle_runtime_converter.destroy()
diff --git a/tools/ovc/openvino/tools/ovc/help.py b/tools/ovc/openvino/tools/ovc/help.py
index 1ef914c0f48143..e09102be39419e 100644
--- a/tools/ovc/openvino/tools/ovc/help.py
+++ b/tools/ovc/openvino/tools/ovc/help.py
@@ -7,7 +7,8 @@ def get_convert_model_help_specifics():
     return {
         'input_model':
             {'description':
-                 'Input model file(s) from TensorFlow, ONNX, PaddlePaddle. '
+                 'Input model file(s) from PyTorch (ExportedProgram saved on a disk), '
+                 'TensorFlow, ONNX, PaddlePaddle. '
                  'Use openvino.convert_model in Python to convert models from PyTorch.'
                  '',
              'action': CanonicalizePathCheckExistenceAction,
diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py
index dfe25f27d13d7d..0119a541494cb9 100644
--- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py
+++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py
@@ -2,14 +2,23 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import logging as log
+import pathlib
 import sys
 
 import numpy as np
+
 # pylint: disable=no-name-in-module,import-error
 from openvino.runtime import Tensor, PartialShape
-from openvino.tools.ovc.error import Error
 from openvino.tools.ovc.cli_parser import single_input_to_input_cut_info, _InputCutInfo
+from openvino.tools.ovc.error import Error
+
 
+def extract_module_extensions(args):
+    from openvino.frontend.pytorch.module_extension import ModuleExtension
+    extensions = args.get('extension', []) or []
+    if not isinstance(extensions, (list, tuple)):
+        extensions = [extensions]
+    return {extension.module: extension for extension in extensions if isinstance(extension, ModuleExtension)}
 
 
 def get_pytorch_decoder(model, example_inputs, args):
@@ -21,12 +30,6 @@ def get_pytorch_decoder(model, example_inputs, args):
     except Exception as e:
         log.error("PyTorch frontend loading failed")
         raise e
-    
-    def extract_module_extensions(args):
-        extensions = args.get('extension', []) or []
-        if not isinstance(extensions, (list, tuple)):
-            extensions = [extensions]
-        return {extension.module: extension for extension in extensions if isinstance(extension, ModuleExtension)}
 
     if 'nncf' in sys.modules:
         is_good_version = True
@@ -54,7 +57,7 @@ def extract_module_extensions(args):
                 model = model.run_decompositions(decomp_table=decomp)
             gm = model.module()
             log.debug(gm.code)
-            decoder = TorchFXPythonDecoder(gm)
+            decoder = TorchFXPythonDecoder(gm, dynamic_shapes=True)
         else:
             decoder = TorchScriptPythonDecoder(
                 model,
@@ -69,6 +72,57 @@ def extract_module_extensions(args):
     return args
 
 
+def get_pytorch_decoder_for_model_on_disk(argv, args):
+    try:
+        from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder
+        from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
+        import torch
+    except:
+        return False
+
+    example_inputs = None
+    if 'example_input' in args and args['example_input'] is not None:
+        example_inputs = args['example_input']
+
+    if isinstance(argv.input_model, (tuple, list)) and len(argv.input_model) == 1:
+        input_model = argv.input_model[0]
+    else:
+        input_model = argv.input_model
+
+    if isinstance(input_model, (str, pathlib.Path)):
+        # attempt to load scripted model
+        try:
+            inputs = prepare_torch_inputs(example_inputs)
+            model = torch.jit.load(input_model)
+            model.eval()
+            decoder = TorchScriptPythonDecoder(
+                model,
+                example_input=inputs,
+                shared_memory=args.get("share_weights", True),
+                module_extensions=extract_module_extensions(args))
+            argv.input_model = decoder
+            argv.framework = 'pytorch'
+            return True
+        except:
+            pass
+    if isinstance(input_model, (str, pathlib.Path)):
+        # attempt to load exported model
+        try:
+            exported_program = torch.export.load(input_model)
+            if hasattr(torch, "export") and isinstance(exported_program, (torch.export.ExportedProgram)):
+                from packaging import version
+                if version.parse(torch.__version__) >= version.parse("2.2"):
+                    exported_program = exported_program.run_decompositions()
+                gm = exported_program.module()
+                decoder = TorchFXPythonDecoder(gm, dynamic_shapes=True)
+                argv.input_model = decoder
+                argv.framework = 'pytorch'
+                return True
+        except:
+            pass
+    return False
+
+
 def update_list_or_dict(container, name, idx, value):
     if isinstance(container, dict):
         if name is None: