Skip to content

Commit

Permalink
Fixes as per comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
nshchego committed Jul 4, 2023
1 parent e7bdc4d commit 516f99e
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 14 deletions.
17 changes: 17 additions & 0 deletions src/plugins/intel_cpu/src/nodes/common/cpu_memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include <cstring>
#include "ie_api.h"
#include <ie_parallel.hpp>
#include <onednn/dnnl.h>

namespace ov {
namespace intel_cpu {
Expand Down Expand Up @@ -51,5 +53,20 @@ inline int cpu_memcpy_s(void* dst, size_t dst_size, const void* src, size_t coun
return 0;
}

inline void cpu_parallel_memcpy(void* dst, const void* src, size_t count) {
const size_t l2_cache_size = dnnl::utils::get_cache_size(2, true);
if (count >= l2_cache_size) {
auto src_int8 = static_cast<const uint8_t *>(src);
auto dst_int8 = static_cast<uint8_t *>(dst);
parallel_nt(0, [&](const size_t ithr, const size_t nthr) {
size_t start = 0, end = 0;
splitter(count, nthr, ithr, start, end);
cpu_memcpy(dst_int8 + start, src_int8 + start, end - start);
});
} else {
cpu_memcpy(dst, src, count);
}
}

} // namespace intel_cpu
} // namespace ov
29 changes: 15 additions & 14 deletions src/plugins/intel_cpu/src/nodes/unique.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "ie_parallel.hpp"
#include <openvino/op/unique.hpp>
#include "common/cpu_memcpy.h"
#include <utils/shape_inference/shape_inference_internal_dyn.hpp>

using namespace InferenceEngine;
Expand Down Expand Up @@ -180,7 +181,7 @@ void Unique::flattenTensorExec() {
uniqueLen = inputLen;

if (sorted) {
std::memcpy(uniDataTmpPtr, srcDataPtr, inputLen * sizeof(T));
cpu_parallel_memcpy(uniDataTmpPtr, srcDataPtr, inputLen * sizeof(T));
std::sort(uniDataTmpPtr, uniDataTmpPtr + inputLen);
auto last = std::unique(uniDataTmpPtr, uniDataTmpPtr + inputLen);
uniqueLen = last - uniDataTmpPtr;
Expand Down Expand Up @@ -263,18 +264,18 @@ void Unique::flattenTensorExec() {
redefineOutputMemory({ {uniqueLen}, {uniqueLen}, {inputLen}, {uniqueLen}});

T* uniDataPtr = reinterpret_cast<T*>(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr());
memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T));
cpu_parallel_memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T));
if (definedOutputs[FIRST_UNIQUE_IDX]) {
int *firstPtr = reinterpret_cast<int*>(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr());
memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int));
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
auto inToOutPtr = reinterpret_cast<int*>(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr());
memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int));
cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int));
}
if (definedOutputs[OCCURRENCES_NUM]) {
auto occurPtr = reinterpret_cast<int*>(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr());
memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
cpu_parallel_memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int));
}
}

Expand Down Expand Up @@ -383,7 +384,7 @@ void Unique::slicedTensorExec() {
auto first1 = srcDataPtr + uniqIdx[u] * innerLen;
auto first2 = dstDataPtr + u * innerLen;
for (int64_t p = 0lu; p < outerLen; p++) {
memcpy(first2, first1, innerSizeB);
cpu_memcpy(first2, first1, innerSizeB);
first1 += srcOuterStep;
first2 += dstOuterStep;
}
Expand Down Expand Up @@ -425,7 +426,7 @@ void Unique::slicedTensorExec() {
auto src = dst1 + ot * dstOuterStep + colToSort[u].idx * innerLen;
auto dst = dst2 + ot * dstOuterStep + u * innerLen;

memcpy(dst, src, innerSizeB);
cpu_memcpy(dst, src, innerSizeB);
});

if (defined3outputs) {
Expand Down Expand Up @@ -460,26 +461,26 @@ void Unique::slicedTensorExec() {
}

if (definedOutputs[UNIQUE_DATA] && dst1 != dstDataPtr) {
memcpy(dstDataPtr, dst1, dstUniDataLen * sizeof(T));
cpu_parallel_memcpy(dstDataPtr, dst1, dstUniDataLen * sizeof(T));
}
if (definedOutputs[FIRST_UNIQUE_IDX] && first2 != firstPtr) {
memcpy(firstPtr, first2, uniqueLenIB);
cpu_parallel_memcpy(firstPtr, first2, uniqueLenIB);
}
if (definedOutputs[INPUT_TO_UNIQ_IDX] && inToOut2 != inToOutPtr) {
memcpy(inToOutPtr, inToOut2, axisDim * sizeof(int));
cpu_parallel_memcpy(inToOutPtr, inToOut2, axisDim * sizeof(int));
}
if (definedOutputs[OCCURRENCES_NUM] && occurN2 != occurNPtr) {
memcpy(occurNPtr, occurN2, uniqueLenIB);
cpu_parallel_memcpy(occurNPtr, occurN2, uniqueLenIB);
}
} else {
if (definedOutputs[FIRST_UNIQUE_IDX]) {
memcpy(firstPtr, firstUniTmp.data(), uniqueLenIB);
cpu_parallel_memcpy(firstPtr, firstUniTmp.data(), uniqueLenIB);
}
if (definedOutputs[INPUT_TO_UNIQ_IDX]) {
memcpy(inToOutPtr, inToOutTmp.data(), axisDim * sizeof(int));
cpu_parallel_memcpy(inToOutPtr, inToOutTmp.data(), axisDim * sizeof(int));
}
if (definedOutputs[OCCURRENCES_NUM]) {
memcpy(occurNPtr, occurTmp.data(), uniqueLenIB);
cpu_parallel_memcpy(occurNPtr, occurTmp.data(), uniqueLenIB);
}
}
}

0 comments on commit 516f99e

Please sign in to comment.