Skip to content

Commit

Permalink
Enabled reference ops (openvinotoolkit#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
alvoron authored and dmitry-gorokhov committed Apr 5, 2023
1 parent 3e435fe commit 4ca815c
Show file tree
Hide file tree
Showing 19 changed files with 137 additions and 65 deletions.
12 changes: 8 additions & 4 deletions src/plugins/intel_cpu/src/nodes/bin_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace node {

#if defined(OPENVINO_ARCH_X86_64)
#define GET_OFF(field) offsetof(jit_bin_conv_call_args, field)

template <cpu_isa_t isa>
Expand Down Expand Up @@ -874,7 +874,7 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_
}
}
};

#endif
bool BinaryConvolution::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
Expand Down Expand Up @@ -913,7 +913,7 @@ BinaryConvolution::BinaryConvolution(const std::shared_ptr<ngraph::Node>& op, co
}
paddingL = binConv->get_pads_begin();
paddingR = binConv->get_pads_end();

#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(x64::avx512_core)) {
implType = impl_desc_type::jit_avx512;
} else if (mayiuse(x64::avx2)) {
Expand All @@ -923,6 +923,9 @@ BinaryConvolution::BinaryConvolution(const std::shared_ptr<ngraph::Node>& op, co
} else {
implType = impl_desc_type::ref;
}
#else
implType = impl_desc_type::ref;
#endif
} else {
IE_THROW(NotImplemented) << errorMessage;
}
Expand Down Expand Up @@ -1092,7 +1095,7 @@ void BinaryConvolution::createPrimitive() {
IMPLICATION(jcp.kw > 7, (jcp.t_pad == 0 && jcp.l_pad == 0) || (jcp.stride_w == 1 && jcp.stride_h == 1));
if (!args_ok)
IE_THROW() << "BinaryConvolution with name '" << getName() << "' has unsupported parameters";

#if defined(OPENVINO_ARCH_X86_64)
if (implType == impl_desc_type::jit_avx512) {
bin_conv_kernel.reset(new jit_uni_bin_conv_kernel_f32<x64::avx512_core>(jcp, jcp_dw_conv, *attr.get()));
} else if (implType == impl_desc_type::jit_avx2) {
Expand All @@ -1102,6 +1105,7 @@ void BinaryConvolution::createPrimitive() {
}
if (bin_conv_kernel)
bin_conv_kernel->create_ker();
#endif
}

bool BinaryConvolution::canFuse(const NodePtr& node) const {
Expand Down
18 changes: 14 additions & 4 deletions src/plugins/intel_cpu/src/nodes/def_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace node {

#if defined(OPENVINO_ARCH_X86_64)
#define GET_OFF(field) offsetof(jit_def_conv_call_args, field)

template <cpu_isa_t isa>
Expand Down Expand Up @@ -671,7 +671,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_
pop(reg_sampled_offs);
}
};

#endif
bool DeformableConvolution::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (!one_of(op->get_type_info(),
Expand Down Expand Up @@ -824,6 +824,7 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() {
config.outConfs[0].inPlace(-1);

impl_desc_type impl_type;
#if defined(OPENVINO_ARCH_X86_64)
const int simd_w = mayiuse(cpu::x64::avx512_core) ? 16 : 8;

auto &weiDims = getInputShapeAtPort(WEI_ID).getDims();
Expand All @@ -846,7 +847,10 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() {
} else {
impl_type = impl_desc_type::ref;
}

#else
impl_type = impl_desc_type::ref;
#endif
#if defined(OPENVINO_ARCH_X86_64)
if (!enforceRef && mayiuse(cpu::x64::sse41)) {
// optimized implementation
auto dataFormat = memory::format_tag::nhwc;
Expand All @@ -868,6 +872,7 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() {
memory::data_type::f32, dataFormat));
supportedPrimitiveDescriptors.push_back({config, impl_type});
} else {
#endif
// reference implementation
config.inConfs[DATA_ID].setMemDesc(std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(DATA_ID), memory::data_type::f32,
memory::format_tag::nchw));
Expand All @@ -882,7 +887,9 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() {
config.outConfs[0].setMemDesc(std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(DATA_ID), memory::data_type::f32,
memory::format_tag::nchw));
supportedPrimitiveDescriptors.push_back({config, impl_type});
#if defined(OPENVINO_ARCH_X86_64)
}
#endif
}

void DeformableConvolution::DefConvExecutor::prepareSamplingWeights(
Expand Down Expand Up @@ -1033,7 +1040,7 @@ DeformableConvolution::DefConvExecutor::DefConvExecutor(const DefConvAttr &defCo
if (withModulation) {
modStrides = descVector[MOD_ID]->getStrides();
}

#if defined(OPENVINO_ARCH_X86_64)
const VectorDims srcDims = descVector[DATA_ID]->getShape().getStaticDims();
const VectorDims weiDims = descVector[WEI_ID]->getShape().getStaticDims();
const VectorDims dstDims = descVector[descVector.size() - 1]->getShape().getStaticDims();
Expand Down Expand Up @@ -1084,11 +1091,13 @@ DeformableConvolution::DefConvExecutor::DefConvExecutor(const DefConvAttr &defCo
jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4;

jcp.nthr = dnnl_get_max_threads();
#endif
}

DeformableConvolution::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr &defConvAttr,
const std::vector<std::shared_ptr<BlockedMemoryDesc>> &descVector) :
DefConvExecutor(defConvAttr, descVector) {
#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(cpu::x64::avx512_core)) {
def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx512_core>(jcp));
} else if (mayiuse(cpu::x64::avx2)) {
Expand All @@ -1103,6 +1112,7 @@ DeformableConvolution::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr
} else {
IE_THROW() << "Can't compile DefConvJitExecutor";
}
#endif
}

void DeformableConvolution::DefConvRefExecutor::exec(const float* src, const float* offsets,
Expand Down
7 changes: 4 additions & 3 deletions src/plugins/intel_cpu/src/nodes/dft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,10 +535,11 @@ void DFT::prepareParams() {
hasFFT = true;
}
}

#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(cpu::x64::sse41)) {
createJITKernels(hasDFT, hasFFT);
}
#endif
}

std::vector<int32_t> DFT::getAxes() const {
Expand All @@ -553,7 +554,7 @@ std::vector<int32_t> DFT::getAxes() const {
std::sort(axes.begin(), axes.end());
return axes;
}

#if defined(OPENVINO_ARCH_X86_64)
void DFT::createJITKernels(bool hasDFT, bool hasFFT) {
if (hasDFT && dftKernel == nullptr) {
if (mayiuse(cpu::x64::avx512_core)) {
Expand Down Expand Up @@ -585,7 +586,7 @@ void DFT::createJITKernels(bool hasDFT, bool hasFFT) {
fftKernel->create_ker();
}
}

#endif
} // namespace node
} // namespace intel_cpu
} // namespace ov
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/src/nodes/dft.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ class DFT : public Node {

private:
std::vector<int32_t> getAxes() const;
#if defined(OPENVINO_ARCH_X86_64)
void createJITKernels(bool hasDFT, bool hasFFT);

#endif
void dftNd(float* output,
const VectorDims& outputShape,
const VectorDims& outputStrides,
Expand Down
25 changes: 18 additions & 7 deletions src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace node {

#if defined(OPENVINO_ARCH_X86_64)
#define GET_OFF(field) offsetof(jit_extract_image_patches_args, field)

template <cpu_isa_t isa>
Expand Down Expand Up @@ -270,7 +270,7 @@ struct jit_extract_image_patches_kernel : public jit_uni_extract_image_patches_k
dd(i * jpp.SW * jpp.dtype_size);
}
};

#endif
bool ExtractImagePatches::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
auto extImgPatcher = ngraph::as_type_ptr<const ngraph::opset3::ExtractImagePatches>(op);
Expand Down Expand Up @@ -378,8 +378,13 @@ void ExtractImagePatches::prepareParams() {
const auto& out_dims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
const auto prcSize = getOriginalInputPrecisionAtPort(0).size();
ExtractImagePatchesKey key = {in_dims, out_dims, _ksizes, _strides, _rates, _auto_pad, prcSize};
#if defined(OPENVINO_ARCH_X86_64)
const auto isJit = mayiuse(x64::sse41);
#else
const auto isJit = false;
#endif
auto buildExecutor = [&isJit](const ExtractImagePatchesKey& key) -> executorPtr {
#if defined(OPENVINO_ARCH_X86_64)
if (isJit) {
return std::make_shared<ExtractImagePatchesJitExecutor>(key.inDims,
key.outDims,
Expand All @@ -389,14 +394,18 @@ void ExtractImagePatches::prepareParams() {
key.padType,
key.prcSize);
} else {
#else
return std::make_shared<ExtractImagePatchesRefExecutor>(key.inDims,
key.outDims,
key.kSizes,
key.strides,
key.rates,
key.padType,
key.prcSize);
#endif
#if defined(OPENVINO_ARCH_X86_64)
}
#endif
};
auto cache = context->getParamsCache();
auto result = cache->getOrCreate(key, buildExecutor);
Expand Down Expand Up @@ -478,7 +487,7 @@ void ExtractImagePatches::ExtractImagePatchesRefExecutor::executeReference(
memset(my_dst_ptr, 0, num_bytes_to_set);
});
}

#if defined(OPENVINO_ARCH_X86_64)
void ExtractImagePatches::ExtractImagePatchesJitExecutor::executeOptimizedGeneric(
void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const {
const char* src_data = reinterpret_cast<const char*>(src);
Expand Down Expand Up @@ -508,7 +517,7 @@ void ExtractImagePatches::ExtractImagePatchesJitExecutor::executeOptimizedGeneri
(*pKernel)(&args);
});
}

#endif
jit_extract_image_patches_params ExtractImagePatches::ExtractImagePatchesExecutor::fillJpp(
const VectorDims& inDims,
const VectorDims& outDims,
Expand Down Expand Up @@ -564,6 +573,7 @@ jit_extract_image_patches_params ExtractImagePatches::ExtractImagePatchesExecuto
}

jpp.dtype_size = prcSize;
#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(x64::avx512_core)) {
jpp.block_size = cpu_isa_traits<x64::avx512_core>::vlen / prcSize;
} else if (mayiuse(x64::avx2)) {
Expand All @@ -573,10 +583,11 @@ jit_extract_image_patches_params ExtractImagePatches::ExtractImagePatchesExecuto
} else {
jpp.block_size = 1;
}

#endif
jpp.block_size = 1;
return jpp;
}

#if defined(OPENVINO_ARCH_X86_64)
ExtractImagePatches::ExtractImagePatchesJitExecutor::ExtractImagePatchesJitExecutor(
const VectorDims& inDims,
const VectorDims& outDims,
Expand Down Expand Up @@ -606,7 +617,7 @@ void ExtractImagePatches::ExtractImagePatchesJitExecutor::exec(
IE_THROW() << "Can't execute, kernel for extract image patches node is not compiled";
executeOptimizedGeneric(src, dst, istrides, ostrides);
}

#endif
ExtractImagePatches::ExtractImagePatchesRefExecutor::ExtractImagePatchesRefExecutor(
const VectorDims& inDims,
const VectorDims& outDims,
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/nodes/extract_image_patches.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class ExtractImagePatches : public Node {

using executorPtr = std::shared_ptr<ExtractImagePatchesExecutor>;
executorPtr execPtr = nullptr;

#if defined(OPENVINO_ARCH_X86_64)
struct ExtractImagePatchesJitExecutor : public ExtractImagePatchesExecutor {
ExtractImagePatchesJitExecutor(
const VectorDims& inDims,
Expand All @@ -111,7 +111,7 @@ class ExtractImagePatches : public Node {
private:
std::unique_ptr<jit_uni_extract_image_patches_kernel> pKernel;
};

#endif
struct ExtractImagePatchesRefExecutor : public ExtractImagePatchesExecutor {
ExtractImagePatchesRefExecutor(
const VectorDims& inDims,
Expand Down
23 changes: 16 additions & 7 deletions src/plugins/intel_cpu/src/nodes/fake_quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ using namespace Xbyak;
namespace ov {
namespace intel_cpu {
namespace node {

#if defined(OPENVINO_ARCH_X86_64)
#define GET_OFF(field) offsetof(jit_quantize_call_args, field)

template <cpu_isa_t isa>
Expand Down Expand Up @@ -863,7 +863,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
}
}
};

#endif
bool FakeQuantize::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(op);
Expand Down Expand Up @@ -1316,6 +1316,7 @@ void FakeQuantize::initSupportedPrimitiveDescriptors() {
return;

impl_desc_type impl_type;
#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(cpu::x64::avx512_core)) {
impl_type = impl_desc_type::jit_avx512;
} else if (mayiuse(cpu::x64::avx2)) {
Expand All @@ -1325,8 +1326,14 @@ void FakeQuantize::initSupportedPrimitiveDescriptors() {
} else {
impl_type = impl_desc_type::ref;
}

#else
impl_type = impl_desc_type::ref;
#endif
#if defined(OPENVINO_ARCH_X86_64)
if (!mayiuse(cpu::x64::sse41) || getAxis() != 1) {
#else
if (getAxis() != 1) {
#endif
impl_type = impl_desc_type::ref;

if (!isBinarization()) {
Expand Down Expand Up @@ -1445,6 +1452,7 @@ void FakeQuantize::createPrimitive() {
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
#if defined(OPENVINO_ARCH_X86_64)
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
const auto& config = getSelectedPrimitiveDescriptor()->getConfig();

Expand Down Expand Up @@ -1485,6 +1493,7 @@ void FakeQuantize::createPrimitive() {
auto result = cache->getOrCreate(key, buildExecutor);
execPtr = result.first;
}
#endif
}

void FakeQuantize::executeReference() {
Expand Down Expand Up @@ -1596,7 +1605,7 @@ void FakeQuantize::executeReference() {
});
}
}

#if defined(OPENVINO_ARCH_X86_64)
void FakeQuantize::executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
const auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
Expand Down Expand Up @@ -1761,7 +1770,7 @@ void FakeQuantize::executeQuantization(const std::unique_ptr<jit_uni_quantize_ke
});
}
}

#endif
void FakeQuantize::executeDynamicImpl(dnnl::stream strm) {
execute(strm);
}
Expand Down Expand Up @@ -2108,7 +2117,7 @@ bool FakeQuantize::appendAttrPostOps(DnnlPostOpsComposer& dnnlpoc,
dnnlpoc.appendLinear(f.osc, f.osh, isLastPostOp, allowBinary);
return true;
}

#if defined(OPENVINO_ARCH_X86_64)
FakeQuantize::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) {
bool isBinarization = _jqp.op_type == Algorithm::FQBinarization;
if (mayiuse(cpu::x64::avx512_core)) {
Expand Down Expand Up @@ -2144,7 +2153,7 @@ void FakeQuantize::FakeQuantizeJitExecutor::exec(const FakeQuantize& node) {
node.executeQuantization(pKernel);
}
}

#endif
bool FakeQuantize::created() const {
return getType() == Type::FakeQuantize;
}
Expand Down
Loading

0 comments on commit 4ca815c

Please sign in to comment.