Skip to content

Commit

Permalink
[CORE][CACHE_DIR] xsputn parallel optimization (openvinotoolkit#25847)
Browse files Browse the repository at this point in the history
### Details:
- *Multithreading implementation of the fn OstreamHashWrapper::xsputn()*

### Tickets:
 - *127331*
  • Loading branch information
nshchego authored Aug 7, 2024
1 parent bc5e902 commit 6dce275
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
2 changes: 2 additions & 0 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ ov_build_target_faster(openvino_core_obj

ov_add_version_defines(src/version.cpp openvino_core_obj)

ov_set_threading_interface_for(openvino_core_obj)

target_link_libraries(openvino_core_obj PRIVATE openvino::reference openvino::util
openvino::pugixml openvino::shape_inference openvino::core::dev)

Expand Down
22 changes: 12 additions & 10 deletions src/core/src/pass/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "openvino/core/except.hpp"
#include "openvino/core/meta_data.hpp"
#include "openvino/core/model.hpp"
#include "openvino/core/parallel.hpp"
#include "openvino/core/type/float16.hpp"
#include "openvino/op/util/framework_node.hpp"
#include "openvino/opsets/opset1.hpp"
Expand Down Expand Up @@ -1385,26 +1386,27 @@ static uint64_t hash_combine(uint64_t seed, const T& a) {
}

class OstreamHashWrapper final : public std::streambuf {
uint64_t m_res = 0;
uint64_t m_res = 0lu;

public:
uint64_t getResult() const {
return m_res;
}

std::streamsize xsputn(const char* s, std::streamsize n) override {
auto* intS = (const std::streamsize*)s;
std::streamsize n64 = n / static_cast<std::streamsize>(sizeof(std::streamsize));
std::streamsize i = 0;
// Using 64-bit values executes much faster than char
while (i++ < n64) {
m_res += *(intS++);
}
// Reinterpret data as uint32_t and accumulate in uint64_t to avoid overflow fluctuations in parallel_sum.
auto* int_sum = reinterpret_cast<const uint32_t*>(s);
const uint64_t n32 = n / sizeof(uint32_t);

m_res += parallel_sum(n32, uint64_t(0lu), [&](size_t k) -> uint32_t {
return int_sum[k];
});

std::streamsize rest = n % static_cast<std::streamsize>(sizeof(std::streamsize));
for (i = 0; i < rest; i++) {
const uint64_t rest = n % sizeof(uint32_t);
for (uint64_t i = 0lu; i < rest; i++) {
m_res += s[n - rest + i];
}

return n;
}
};
Expand Down

0 comments on commit 6dce275

Please sign in to comment.