Skip to content

Commit

Permalink
Added Reduce executors (openvinotoolkit#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
alvoron authored Mar 3, 2023
1 parent 2deed7a commit b1daeeb
Show file tree
Hide file tree
Showing 19 changed files with 849 additions and 33 deletions.
20 changes: 1 addition & 19 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,14 @@
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_utils.hpp"
#include "acl_matmul.hpp"

namespace ov {
namespace intel_cpu {

using namespace arm_compute;

TensorShape shapeCast(const VectorDims& dims) {
arm_compute::TensorShape tensorShape;
for (std::size_t i = 0; i < dims.size(); ++i) {
tensorShape.set(dims.size() - i - 1, dims[i], false);
}
if (tensorShape.num_dimensions() == 0) {
tensorShape.set(0, 1, false);
tensorShape.set_num_dimensions(1);
}
return tensorShape;
}

inline Dim vectorProduct(const VectorDims& vec, size_t size) {
Dim prod = 1;
for (size_t i = 0; i < size; ++i)
prod *= vec[i];
return prod;
}

AclMatMulExecutor::AclMatMulExecutor(const ExecutorContext::CPtr context) : MatMulExecutor(context) {}

bool AclMatMulExecutor::init(const MatMulAttrs& matmulAttrs,
Expand Down
2 changes: 0 additions & 2 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_matmul.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
namespace ov {
namespace intel_cpu {

arm_compute::TensorShape shapeCast(const VectorDims& dims);

class AclMatMulExecutor : public MatMulExecutor {
public:
AclMatMulExecutor(const ExecutorContext::CPtr context);
Expand Down
101 changes: 101 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_utils.hpp"
#include "acl_reduce.hpp"

namespace ov {
namespace intel_cpu {

using namespace arm_compute;

arm_compute::ReductionOperation getAclReductionOperationByAlgorithm(Algorithm algorithm) {
switch (algorithm) {
case Algorithm::ReduceMax: return arm_compute::ReductionOperation::MAX;
case Algorithm::ReduceMin: return arm_compute::ReductionOperation::MIN;
case Algorithm::ReduceSum: return arm_compute::ReductionOperation::SUM;
case Algorithm::ReduceProd: return arm_compute::ReductionOperation::PROD;
default: IE_THROW() << "Unsupported reduction operation: " << static_cast<int>(algorithm);
}
}

AclReduceExecutor::AclReduceExecutor(const ExecutorContext::CPtr context) : ReduceExecutor(context) {}

bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) {
if (reduceAttrs.operation != Algorithm::ReduceMax &&
reduceAttrs.operation != Algorithm::ReduceMin &&
reduceAttrs.operation != Algorithm::ReduceSum &&
reduceAttrs.operation != Algorithm::ReduceProd &&
reduceAttrs.operation != Algorithm::ReduceMean) {
return false;
}

this->reduceAttrs = reduceAttrs;

auto srcDims = srcDescs[0]->getShape().getStaticDims();
auto dstDims = dstDescs[0]->getShape().getStaticDims();

TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1,
precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1,
precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0]));

srcTensor.allocator()->init(srcTensorInfo);
dstTensor.allocator()->init(dstTensorInfo);

switch (reduceAttrs.operation) {
case Algorithm::ReduceMean:
for (size_t i = 0; i < reduceAttrs.axes.size(); ++i) {
auto pos = axisCast(i, reduceAttrs.axes.size());
axesMean.set(pos, reduceAttrs.axes[i]);
}
if (!arm_compute::NEReduceMean::validate(&srcTensorInfo, axesMean, reduceAttrs.keepDims, &dstTensorInfo)) {
return false;
}
exec_func = [this]{
auto acl_op = std::make_unique<arm_compute::NEReduceMean>();
acl_op->configure(&srcTensor, axesMean, this->reduceAttrs.keepDims, &dstTensor);
acl_op->run();
};
break;
case Algorithm::ReduceMax:
case Algorithm::ReduceMin:
case Algorithm::ReduceSum:
case Algorithm::ReduceProd:
if (reduceAttrs.axes.size() != 1) {
return false;
}
if (!arm_compute::NEReductionOperation::validate(&srcTensorInfo, &dstTensorInfo, axisCast(reduceAttrs.axes[0], srcDims.size()),
getAclReductionOperationByAlgorithm(reduceAttrs.operation), reduceAttrs.keepDims)) {
return false;
}
exec_func = [this, srcDims]{
auto acl_op = std::make_unique<arm_compute::NEReductionOperation>();
acl_op->configure(&srcTensor, &dstTensor, axisCast(this->reduceAttrs.axes[0], srcDims.size()),
getAclReductionOperationByAlgorithm(this->reduceAttrs.operation), this->reduceAttrs.keepDims);
acl_op->run();
};
break;
default:
IE_THROW() << "Unsupported operation type for ACL Reduce executor: " << static_cast<int>(reduceAttrs.operation);
}

return true;
}

void AclReduceExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, std::unordered_map<int, MemoryPtr> postOpsArgs) {
srcTensor.allocator()->import_memory(src[0]->GetPtr());
dstTensor.allocator()->import_memory(dst[0]->GetPtr());

exec_func();

srcTensor.allocator()->free();
dstTensor.allocator()->free();
}

} // namespace intel_cpu
} // namespace ov
60 changes: 60 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

// TODO: remove relative path
#include "../reduce.hpp"
#include "arm_compute/runtime/NEON/NEFunctions.h"

namespace ov {
namespace intel_cpu {

class AclReduceExecutor : public ReduceExecutor {
public:
AclReduceExecutor(const ExecutorContext::CPtr context);

bool init(const ReduceAttrs& reduceAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) override;
void exec(const std::vector<MemoryCPtr>& src,
const std::vector<MemoryPtr>& dst,
std::unordered_map<int, MemoryPtr> postOpsArgs) override;

impl_desc_type getImplType() const override {
return implType;
}

private:
std::function<void()> exec_func;
ReduceAttrs reduceAttrs;
impl_desc_type implType = impl_desc_type::acl;

arm_compute::Coordinates axesMean;
arm_compute::Tensor srcTensor;
arm_compute::Tensor dstTensor;
};

class AclReduceExecutorBuilder : public ReduceExecutorBuilder {
public:
bool isSupported(const ReduceAttrs& reduceAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs) const override {
if (srcDescs[0]->getPrecision() != dstDescs[0]->getPrecision() ||
(srcDescs[0]->getPrecision() != InferenceEngine::Precision::FP32 &&
dstDescs[0]->getPrecision() != InferenceEngine::Precision::FP16 &&
dstDescs[0]->getPrecision() != InferenceEngine::Precision::I32))
return false;

return true;
}

ReduceExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override {
return std::make_shared<AclReduceExecutor>(context);
}
};

} // namespace intel_cpu
} // namespace ov
36 changes: 34 additions & 2 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,41 @@
//
#pragma once

#include "ie_precision.hpp"
#include "memory_desc/cpu_memory_desc.h"
#include "arm_compute/core/Types.h"

namespace ov {
namespace intel_cpu {

/**
* @brief Return ComputeLibrary TensorShape with reverted layout schema used in ACL
* @param dims vector of dimensions to convert
* @return ComputeLibrary TensorShape object
*/
inline arm_compute::TensorShape shapeCast(const VectorDims& dims) {
arm_compute::TensorShape tensorShape;
for (std::size_t i = 0; i < dims.size(); ++i) {
tensorShape.set(dims.size() - i - 1, dims[i], false);
}
if (tensorShape.num_dimensions() == 0) {
tensorShape.set(0, 1, false);
tensorShape.set_num_dimensions(1);
}
return tensorShape;
}

inline std::size_t axisCast(const std::size_t axis, const std::size_t shapeSize) {
return shapeSize - axis - 1;
}

inline Dim vectorProduct(const VectorDims& vec, size_t size) {
Dim prod = 1;
for (size_t i = 0; i < size; ++i)
prod *= vec[i];
return prod;
}

/**
* @brief Return ComputeLibrary DataType that corresponds to the given precision
* @param precision precision to be converted
Expand Down Expand Up @@ -36,8 +68,8 @@ inline arm_compute::DataType precisionToAclDataType(InferenceEngine::Precision p
inline arm_compute::DataLayout getAclDataLayoutByMemoryDesc(MemoryDescCPtr desc) {
if (desc->hasLayoutType(LayoutType::ncsp)) {
if (desc->getShape().getRank() == 4) return arm_compute::DataLayout::NCHW;
if (desc->getShape().getRank() == 5) return arm_compute::DataLayout::NCDHW;
} else if(desc->hasLayoutType(LayoutType::nspc)) {
if (desc->getShape().getRank() == 5) return arm_compute::DataLayout::NCDHW;
} else if (desc->hasLayoutType(LayoutType::nspc)) {
if (desc->getShape().getRank() == 4) return arm_compute::DataLayout::NHWC;
if (desc->getShape().getRank() == 5) return arm_compute::DataLayout::NDHWC;
}
Expand Down
Loading

0 comments on commit b1daeeb

Please sign in to comment.