Skip to content

Commit

Permalink
Merge branch 'master' into xp/fix_zoom_regression_to_master
Browse files Browse the repository at this point in the history
  • Loading branch information
xipingyan authored Aug 16, 2024
2 parents 8af13ec + 0b38dd7 commit 36805ef
Show file tree
Hide file tree
Showing 55 changed files with 1,440 additions and 291 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/fedora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ jobs:
if-no-files-found: 'error'

RPM_Packages:
name: RPM packages
needs: [Docker, Build]
timeout-minutes: 10
defaults:
Expand Down Expand Up @@ -273,6 +274,7 @@ jobs:
python3 -c 'from openvino import Core; Core().get_property("BATCH", "SUPPORTED_PROPERTIES")'
python3 -c 'from openvino.frontend import FrontEndManager; assert len(FrontEndManager().get_available_front_ends()) == 7'
benchmark_app --help
opt_in_out --help
ovc --help
Overall_Status:
Expand Down
8 changes: 1 addition & 7 deletions .github/workflows/job_build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,7 @@ jobs:
python3 -m pip install -r ${OPENVINO_REPO}/src/bindings/python/wheel/requirements-dev.txt
# For running ONNX frontend unit tests
if [[ ${{ inputs.os }} == 'ubuntu_24_04' ]]; then
# Should be removed after https://github.com/openvinotoolkit/openvino/pull/24242 is merged
export CMAKE_GENERATOR="Unix Makefiles"
python3 -m pip install --force-reinstall --no-cache-dir -r ${OPENVINO_REPO}/src/frontends/onnx/tests/requirements.txt
else
python3 -m pip install --force-reinstall -r ${OPENVINO_REPO}/src/frontends/onnx/tests/requirements.txt
fi
python3 -m pip install -r ${OPENVINO_REPO}/src/frontends/onnx/tests/requirements.txt
# For running TensorFlow frontend unit tests
python3 -m pip install -r ${OPENVINO_REPO}/src/frontends/tensorflow/tests/requirements.txt
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/linux_conditional_compilation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
BUILD_DIR: /__w/openvino/openvino/openvino_build
SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat
MODELS_PATH: /__w/openvino/openvino/testdata
SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release
SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release_faster_build
if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }}

steps:
Expand Down Expand Up @@ -157,6 +157,7 @@ jobs:
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
-DENABLE_PROFILING_ITT=ON \
-DSELECTIVE_BUILD=COLLECT \
-DENABLE_FASTER_BUILD=ON \
-DENABLE_DEBUG_CAPS=ON \
-DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \
-DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \
Expand Down
8 changes: 6 additions & 2 deletions cmake/developer_package/packaging/archive.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ include(GNUInstallDirs)
if(APPLE)
# on macOS versions with SIP enabled, we need to use @rpath
# because DYLD_LIBRARY_PATH is ignored
set(CMAKE_SKIP_INSTALL_RPATH OFF)
set(CMAKE_SKIP_INSTALL_RPATH_DEFAULT OFF)
else()
# we don't need RPATHs, because setupvars.sh is used
set(CMAKE_SKIP_INSTALL_RPATH ON)
set(CMAKE_SKIP_INSTALL_RPATH_DEFAULT ON)
endif()

if(NOT DEFINED CMAKE_SKIP_INSTALL_RPATH)
set(CMAKE_SKIP_INSTALL_RPATH ${CMAKE_SKIP_INSTALL_RPATH_DEFAULT})
endif()

#
Expand Down
3 changes: 0 additions & 3 deletions cmake/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,6 @@ ov_dependent_option (ENABLE_SYSTEM_PROTOBUF "Enables use of system Protobuf" OFF
ov_dependent_option (ENABLE_SYSTEM_SNAPPY "Enables use of system version of Snappy" OFF
"ENABLE_SNAPPY_COMPRESSION" OFF)

ov_dependent_option (ENABLE_PYTHON_PACKAGING "Enables packaging of Python API in APT / YUM" OFF
"ENABLE_PYTHON;UNIX" OFF)

ov_dependent_option(ENABLE_JS "Enables JS API building" ${ENABLE_JS_DEFAULT} "NOT ANDROID;NOT EMSCRIPTEN" OFF)

ov_option(ENABLE_OPENVINO_DEBUG "Enable output for OPENVINO_DEBUG statements" OFF)
Expand Down
11 changes: 6 additions & 5 deletions cmake/packaging/rpm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,6 @@ macro(ov_cpack_settings)
ov_rpm_generate_conflicts("${OV_CPACK_COMP_CORE_DEV}" ${conflicting_versions})

ov_rpm_add_rpmlint_suppression("${OV_CPACK_COMP_CORE_DEV}"
# contains samples source codes
"devel-file-in-non-devel-package /usr/${OV_CPACK_INCLUDEDIR}/ngraph"
"devel-file-in-non-devel-package /usr/${OV_CPACK_INCLUDEDIR}/ie"
"devel-file-in-non-devel-package /usr/${OV_CPACK_INCLUDEDIR}/openvino"
"devel-file-in-non-devel-package /usr/${OV_CPACK_RUNTIMEDIR}/libopenvino*"
"devel-file-in-non-devel-package /usr/${OV_CPACK_RUNTIMEDIR}/pkgconfig/openvino.pc")
Expand All @@ -302,8 +299,12 @@ macro(ov_cpack_settings)
ov_rpm_generate_conflicts(${python_component} ${conflicting_versions})

ov_rpm_add_rpmlint_suppression("${python_component}"
# entry points
"no-manual-page-for-binary benchmark_app"
"no-manual-page-for-binary opt_in_out"
"no-manual-page-for-binary ovc"
# all directories
"non-standard-dir-perm /usr/lib64/${pyversion}/site-packages/openvino/*"
"non-standard-dir-perm /usr/lib/${pyversion}/site-packages/openvino/*"
)
endif()

Expand Down Expand Up @@ -383,7 +384,7 @@ macro(ov_cpack_settings)
set(CPACK_COMPONENT_OPENVINO_DESCRIPTION "Intel(R) Distribution of OpenVINO(TM) Toolkit Libraries and Development files")
set(CPACK_RPM_OPENVINO_PACKAGE_REQUIRES "${libraries_dev_package}, ${samples_package}")
if(ENABLE_PYTHON_PACKAGING)
set(CPACK_DEBIAN_OPENVINO_PACKAGE_DEPENDS "${CPACK_RPM_OPENVINO_PACKAGE_REQUIRES}, ${python_package}, ${python_samples_package}")
set(CPACK_RPM_OPENVINO_PACKAGE_REQUIRES "${CPACK_RPM_OPENVINO_PACKAGE_REQUIRES}, ${python_package}, ${python_samples_package}")
endif()
set(CPACK_RPM_OPENVINO_PACKAGE_NAME "openvino-${cpack_name_ver}")
set(CPACK_RPM_OPENVINO_PACKAGE_ARCHITECTURE "noarch")
Expand Down
5 changes: 4 additions & 1 deletion src/bindings/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ ov_check_init_files_alignment("${INIT_FILES_RUNTIME}")

ov_option(ENABLE_PYTHON "Enables OpenVINO Python API build" ${ENABLE_PYTHON_DEFAULT})

ov_dependent_option (ENABLE_PYTHON_PACKAGING "Enables packaging of Python API in APT / YUM" OFF
"ENABLE_PYTHON;LINUX" OFF)

#
# Check for wheel package
#
Expand Down Expand Up @@ -366,7 +369,7 @@ if(ENABLE_PYTHON_PACKAGING)
ov_cpack_add_component(${OV_CPACK_COMP_PYTHON_OPENVINO}_package_${pyversion} HIDDEN)

install(DIRECTORY ${ov_python_package_prefix}/ ${telemetry_python_package_prefix}/
DESTINATION ${CMAKE_INSTALL_PREFIX}
DESTINATION .
COMPONENT ${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE}_${pyversion}
${OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL}
USE_SOURCE_PERMISSIONS)
Expand Down
14 changes: 14 additions & 0 deletions src/bindings/python/src/pyopenvino/graph/symbol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ void regclass_graph_Symbol(py::module m) {
},
py::is_operator());

symbol.def(
"__add__",
[](const std::shared_ptr<ov::Symbol>& a, const std::shared_ptr<ov::Symbol>& b) {
return a + b;
},
py::is_operator());

symbol.def(
"__sub__",
[](const std::shared_ptr<ov::Symbol>& a, const std::shared_ptr<ov::Symbol>& b) {
return a - b;
},
py::is_operator());

symbol.def(
"__bool__",
[](const std::shared_ptr<ov::Symbol>& self) -> bool {
Expand Down
9 changes: 9 additions & 0 deletions src/bindings/python/tests/test_runtime/test_dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ def test_symbol():
assert dimension.get_symbol() == new_dimension.get_symbol(), "Check: Two symbols are equal: Symbol.__eq__"


def test_symbol_operators():
symbol_a, symbol_b = Symbol(), Symbol()
assert symbol_a + symbol_b == symbol_b + symbol_a

symbol_c, symbol_d = Symbol(), Symbol()
assert symbol_c + symbol_d - symbol_d == symbol_c
assert symbol_c + symbol_d - symbol_c == symbol_d


def test_symbol_hash():
symbol = Symbol()
assert isinstance(hash(symbol), int)
Expand Down
12 changes: 11 additions & 1 deletion src/common/snippets/include/snippets/lowered/pass/init_loops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,17 @@ class InitLoops : public Pass {
InitLoops() = default;
bool run(LinearIR& linear_ir) override;

static void init_loop_info(const UnifiedLoopInfoPtr& loop_info, size_t loop_id, bool only_runtime_args = false);
/**
* @brief Updates ptr_increments and finalization offsets of the provided "loop_info" based on current work amount
*/
static void update_data_pointer_shifts(const UnifiedLoopInfoPtr& loop_info);
/**
* @brief Updates work amount and updates data pointer shifts of the provided "loop_info"
*/
static void update_runtime_parameters(const UnifiedLoopInfoPtr& loop_info);

private:
static void update_compile_parameters(const UnifiedLoopInfoPtr& loop_info, size_t loop_id);
};

} // namespace pass
Expand Down
40 changes: 37 additions & 3 deletions src/common/snippets/include/snippets/pass/split_dimension_m.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ namespace pass {
* @interface SplitDimensionM
* @brief Inserts Reshape nodes before inputs and after outputs of Subgraphs with MatMul inside
* to split dimension M for MatMuls. It allows to increase work amount for parallelism
* @attention This pass works only for MHA with static shapes.
* For dynamic shapes, parallel work amount is optimized in RuntimeConfigurator.
* @todo Ticket 148805: Move static cases handling in RuntimeConfigurator as well.
* @ingroup snippets
*/
class SplitDimensionM: public CommonOptimizations::SubgraphPass {
Expand All @@ -28,17 +31,48 @@ class SplitDimensionM: public CommonOptimizations::SubgraphPass {
// Returns True if parallelism work amount (concurrency) can be increased by this optimization
static bool can_be_optimized(const std::shared_ptr<const ov::Node>& node, size_t concurrency);

/**
* @brief Tries to split M dimension in "shape" in accordance to optimal parallel work amount
* @param shape Original shape
* @param optimal_parallelism_work_amount Optimal work amount
* @param batch_m_dim reference on batch's part of the split M
* @param new_m_dim reference on new M dim after the split
* @return true if split was successfull, otherwise false
*/
static bool split(const ov::Shape& shape, size_t optimal_parallelism_work_amount, size_t& batch_m_dim, size_t& new_m_dim);

/**
* @brief Splits m dimension in order
* @param order Original order
* @param m_index M dimension index
* @return updated order with the split M dimension
*/
static std::vector<size_t> get_updated_order(const std::vector<size_t>& order, size_t m_index);
/**
* @brief Reshapes m dimension in "shape": separates M in two parts: "batch_m_dim" and "new_m_dim"
* @param shape Shape to split
* @param m_index M dimension index
* @param batch_m_dim batch's part of the split M
* @param new_m_dim new M dim after the split
* @return the updated shape
*/
static ov::snippets::VectorDims reshape_m_dim(ov::snippets::VectorDims shape, size_t m_index, size_t batch_m_dim, size_t new_m_dim);
/**
* @brief Unsqueezes m dimension in "shape" (inserts "1" before the dimension)
* @param shape Shape to split
* @param m_index M dimension index
* @return the updated shape
*/
static ov::snippets::VectorDims unsqueeze_m_dim(ov::snippets::VectorDims shape, size_t m_index);

private:
static std::shared_ptr<ov::op::v0::MatMul> get_matmul(const std::shared_ptr<op::Subgraph>& subgraph);
static std::pair<size_t, size_t> get_splited_dimensions(size_t batch_dim, size_t m_dim, size_t optimal_parallelism_work_amount);
static bool split(const ov::Shape& shape, size_t optimal_parallelism_work_amount, size_t& batch_m_dim, size_t& new_m_dim);

void reshape_subgraph(const std::shared_ptr<op::Subgraph>& subgraph, const ov::Shape& shape, size_t batch_m_dim, size_t new_m_dim);

size_t m_concurrency;
};


} // namespace pass
} // namespace snippets
} // namespace ov
86 changes: 82 additions & 4 deletions src/common/snippets/include/snippets/runtime_configurator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/loop_info.hpp"
#include "snippets/kernel_executor_table.hpp"
#include "snippets/lowered/pass/pass.hpp"

Expand Down Expand Up @@ -82,8 +83,14 @@ class RuntimeConfigurator {
/**
* @brief Update RuntimeConfig based on LinearIR
* @param linear_ir LinearIR
* @todo Ticket 148891: Rewrite on PassPipeline
*/
virtual void update(const lowered::LinearIRCPtr& linear_ir);
/**
* @brief Update tensor rank based on master shape
* @param master_shape Master shape
*/
virtual void update_tensor_rank(const ov::snippets::VectorDims& master_shape);
/**
* @brief Allocate and intialize fields in RuntimeConfig and RuntimeConfigurator
* @param linear_ir LinearIR
Expand All @@ -109,11 +116,21 @@ class RuntimeConfigurator {
* @param linear_ir LinearIR
*/
virtual void init_tensor_rank(const lowered::LinearIRCPtr& linear_ir) const;

struct UnifiedLoopInfoRtParams {
size_t work_amount = 0;
std::vector<int64_t> ptr_increments;
std::vector<int64_t> finalization_offsets;
};
static UnifiedLoopInfoRtParams compute_runtime_params(const lowered::UnifiedLoopInfoPtr& unified_loop_info);
using LoopInfoRuntimeParamsMap = std::unordered_map<lowered::UnifiedLoopInfoPtr, UnifiedLoopInfoRtParams>;
/**
* @brief Update Loop informations in LinearIR: Unified and ExpandedLoopInfo
* @param linear_ir LinearIR
* @param initializated_info_map Reference on a map [LoopInfo->RuntimeParams].
* Can be used to pass in the method loop infos which were already initialized, e.g. by parallel domain optimization
*/
void update_loop_info(const lowered::LinearIRCPtr& linear_ir) const;
void update_loop_info(const lowered::LinearIRCPtr& linear_ir, LoopInfoRuntimeParamsMap& initializated_info_map) const;
/**
* @brief Update Buffer scratchpad size and offsets if needed
* Note: `update_loop_info` must be called before
Expand All @@ -122,12 +139,73 @@ class RuntimeConfigurator {
void update_buffer_scratchpad_size(const lowered::LinearIRCPtr& linear_ir) const;
/**
* @brief Calculate data offsets of LinearIR and update these values in RuntimeConfig
* @param shapes shapes used in offsets computation
* @param layouts layouts used in offsets computation
*/
void update_data_offsets(const std::vector<ov::snippets::VectorDims>& shapes,
const std::vector<std::vector<size_t>>& layouts) const;
/**
* @brief Extract shapes from m_io_descs
*/
void update_data_offsets() const;
std::vector<ov::snippets::VectorDims> extract_shapes() const;
/**
* @brief Update latest input shapes
* @brief Extract layouts from m_io_descs
*/
void update_latest_shapes();
std::vector<std::vector<size_t>> extract_layouts() const;

class ParallelWAOptimizer {
public:
/**
* @brief Inits ParallelWAOptimizer: computes optimizer parameters which should be set at compilation stage
* @param linear_ir LinearIR
* @param io_descs Input/output descriptors which are used for optimizer parameters initialization
* @param in_num Number of inputs. It is needed to distinguish input and output shapes/layouts
*/
void init(const ov::snippets::lowered::LinearIRCPtr& linear_ir,
const std::vector<snippets::lowered::PortDescriptorPtr>& io_descs,
size_t in_num);
/**
* @brief Checks if optimizer is enabled
* @todo Ticket 148891: when RuntimeConfigurator::update will be rewritten on PassPipeline, this method should be removed
* We will not just register ParallelWAOptimizer in case if it is not needed
*/
bool enabled();
/**
* @brief Checks if the current master shape can be optimized, and if yes, updates all the necessary runtime information
* @param master_shape Master shape
* @param map Loop info -> Runtime params map which will be passed in "update_loop_info"
* the map is filled with updated loops_to_split loops: "new_m" work amount is set for them, and runtime params are updated correspondingly
* @param shapes Vector which is filled with the split shapes
* @param layouts Vector which is filled with the split layouts
* @param in_num Number of inputs. It is needed to distinguish input and output shapes/layouts
* @return status if the optimization is applied
*/
void optimize(ov::snippets::VectorDims& master_shape,
ov::snippets::RuntimeConfigurator::LoopInfoRuntimeParamsMap& map,
std::vector<ov::snippets::VectorDims>& shapes,
std::vector<std::vector<size_t>>& layouts,
size_t in_num);

private:
void update_master_shape(ov::snippets::VectorDims& master_shape, size_t new_batch_dim, size_t new_kernel_dim);
void update_split_loops_info(ov::snippets::RuntimeConfigurator::LoopInfoRuntimeParamsMap& map, size_t new_kernel_dim);
void update_shapes(std::vector<ov::snippets::VectorDims>& shapes, size_t new_batch_dim, size_t new_kernel_dim);
void update_layouts(std::vector<std::vector<size_t>>& layouts);

static std::unordered_set<snippets::lowered::ExpressionPtr> find_applicable_brgemms(const ov::snippets::lowered::LinearIRCPtr& linear_ir);
static std::unordered_set<size_t> find_unsqueezed_params(
const ov::snippets::lowered::LinearIRCPtr& linear_ir,
const std::unordered_set<snippets::lowered::ExpressionPtr>& brgemms);
static std::unordered_set<ov::snippets::lowered::UnifiedLoopInfoPtr> find_loops_to_split(
const ov::snippets::lowered::LinearIRCPtr& linear_ir,
const std::unordered_set<size_t>& unsqueezed_params);

std::unordered_set<ov::snippets::lowered::UnifiedLoopInfoPtr> loops_to_split{};
std::unordered_set<size_t> unsqueezed_params{};
std::vector<std::vector<size_t>> optimized_layouts{};
std::vector<size_t> m_dim_idces{};
size_t concurrency = 0;
} m_optimizer;

std::shared_ptr<RuntimeConfig> m_config = nullptr;

Expand Down
13 changes: 13 additions & 0 deletions src/common/snippets/include/snippets/utils/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,19 @@ std::shared_ptr<ov::Node> get_leaf_node_of_first_parent_shape_infer_seq(const st

int64_t get_dim_stride(const lowered::ExpressionPort& expr_port, size_t idx = 1);

/**
* @brief Traverses path starting from "expr", and calls "func" for each expression.
* Traversal direction is defined by "visit_parent_path"
* @param expr The expr from which path is started.
* @param visited Set of expressions which were visited.
* @param func The function which is called for each visited node.
* @param visit_parent_path if true, parent nodes are visited. Otherwise, consumers are visited.
*/
void visit_path(const lowered::ExpressionPtr& expr,
std::unordered_set<lowered::ExpressionPtr>& visited,
std::function<void(lowered::ExpressionPtr)> func,
bool visit_parent_path);

} // namespace utils
} // namespace snippets
} // namespace ov
Loading

0 comments on commit 36805ef

Please sign in to comment.