forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Reduce executors (openvinotoolkit#98)
- Loading branch information
Showing
19 changed files
with
849 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
101 changes: 101 additions & 0 deletions
101
src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
// Copyright (C) 2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "acl_utils.hpp" | ||
#include "acl_reduce.hpp" | ||
|
||
namespace ov { | ||
namespace intel_cpu { | ||
|
||
using namespace arm_compute; | ||
|
||
arm_compute::ReductionOperation getAclReductionOperationByAlgorithm(Algorithm algorithm) { | ||
switch (algorithm) { | ||
case Algorithm::ReduceMax: return arm_compute::ReductionOperation::MAX; | ||
case Algorithm::ReduceMin: return arm_compute::ReductionOperation::MIN; | ||
case Algorithm::ReduceSum: return arm_compute::ReductionOperation::SUM; | ||
case Algorithm::ReduceProd: return arm_compute::ReductionOperation::PROD; | ||
default: IE_THROW() << "Unsupported reduction operation: " << static_cast<int>(algorithm); | ||
} | ||
} | ||
|
||
AclReduceExecutor::AclReduceExecutor(const ExecutorContext::CPtr context) : ReduceExecutor(context) {} | ||
|
||
bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs, | ||
const std::vector<MemoryDescPtr>& srcDescs, | ||
const std::vector<MemoryDescPtr>& dstDescs, | ||
const dnnl::primitive_attr &attr) { | ||
if (reduceAttrs.operation != Algorithm::ReduceMax && | ||
reduceAttrs.operation != Algorithm::ReduceMin && | ||
reduceAttrs.operation != Algorithm::ReduceSum && | ||
reduceAttrs.operation != Algorithm::ReduceProd && | ||
reduceAttrs.operation != Algorithm::ReduceMean) { | ||
return false; | ||
} | ||
|
||
this->reduceAttrs = reduceAttrs; | ||
|
||
auto srcDims = srcDescs[0]->getShape().getStaticDims(); | ||
auto dstDims = dstDescs[0]->getShape().getStaticDims(); | ||
|
||
TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1, | ||
precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0])); | ||
TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1, | ||
precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0])); | ||
|
||
srcTensor.allocator()->init(srcTensorInfo); | ||
dstTensor.allocator()->init(dstTensorInfo); | ||
|
||
switch (reduceAttrs.operation) { | ||
case Algorithm::ReduceMean: | ||
for (size_t i = 0; i < reduceAttrs.axes.size(); ++i) { | ||
auto pos = axisCast(i, reduceAttrs.axes.size()); | ||
axesMean.set(pos, reduceAttrs.axes[i]); | ||
} | ||
if (!arm_compute::NEReduceMean::validate(&srcTensorInfo, axesMean, reduceAttrs.keepDims, &dstTensorInfo)) { | ||
return false; | ||
} | ||
exec_func = [this]{ | ||
auto acl_op = std::make_unique<arm_compute::NEReduceMean>(); | ||
acl_op->configure(&srcTensor, axesMean, this->reduceAttrs.keepDims, &dstTensor); | ||
acl_op->run(); | ||
}; | ||
break; | ||
case Algorithm::ReduceMax: | ||
case Algorithm::ReduceMin: | ||
case Algorithm::ReduceSum: | ||
case Algorithm::ReduceProd: | ||
if (reduceAttrs.axes.size() != 1) { | ||
return false; | ||
} | ||
if (!arm_compute::NEReductionOperation::validate(&srcTensorInfo, &dstTensorInfo, axisCast(reduceAttrs.axes[0], srcDims.size()), | ||
getAclReductionOperationByAlgorithm(reduceAttrs.operation), reduceAttrs.keepDims)) { | ||
return false; | ||
} | ||
exec_func = [this, srcDims]{ | ||
auto acl_op = std::make_unique<arm_compute::NEReductionOperation>(); | ||
acl_op->configure(&srcTensor, &dstTensor, axisCast(this->reduceAttrs.axes[0], srcDims.size()), | ||
getAclReductionOperationByAlgorithm(this->reduceAttrs.operation), this->reduceAttrs.keepDims); | ||
acl_op->run(); | ||
}; | ||
break; | ||
default: | ||
IE_THROW() << "Unsupported operation type for ACL Reduce executor: " << static_cast<int>(reduceAttrs.operation); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
void AclReduceExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, std::unordered_map<int, MemoryPtr> postOpsArgs) { | ||
srcTensor.allocator()->import_memory(src[0]->GetPtr()); | ||
dstTensor.allocator()->import_memory(dst[0]->GetPtr()); | ||
|
||
exec_func(); | ||
|
||
srcTensor.allocator()->free(); | ||
dstTensor.allocator()->free(); | ||
} | ||
|
||
} // namespace intel_cpu | ||
} // namespace ov |
60 changes: 60 additions & 0 deletions
60
src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Copyright (C) 2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
// TODO: remove relative path | ||
#include "../reduce.hpp" | ||
#include "arm_compute/runtime/NEON/NEFunctions.h" | ||
|
||
namespace ov { | ||
namespace intel_cpu { | ||
|
||
class AclReduceExecutor : public ReduceExecutor { | ||
public: | ||
AclReduceExecutor(const ExecutorContext::CPtr context); | ||
|
||
bool init(const ReduceAttrs& reduceAttrs, | ||
const std::vector<MemoryDescPtr>& srcDescs, | ||
const std::vector<MemoryDescPtr>& dstDescs, | ||
const dnnl::primitive_attr &attr) override; | ||
void exec(const std::vector<MemoryCPtr>& src, | ||
const std::vector<MemoryPtr>& dst, | ||
std::unordered_map<int, MemoryPtr> postOpsArgs) override; | ||
|
||
impl_desc_type getImplType() const override { | ||
return implType; | ||
} | ||
|
||
private: | ||
std::function<void()> exec_func; | ||
ReduceAttrs reduceAttrs; | ||
impl_desc_type implType = impl_desc_type::acl; | ||
|
||
arm_compute::Coordinates axesMean; | ||
arm_compute::Tensor srcTensor; | ||
arm_compute::Tensor dstTensor; | ||
}; | ||
|
||
class AclReduceExecutorBuilder : public ReduceExecutorBuilder { | ||
public: | ||
bool isSupported(const ReduceAttrs& reduceAttrs, | ||
const std::vector<MemoryDescPtr>& srcDescs, | ||
const std::vector<MemoryDescPtr>& dstDescs) const override { | ||
if (srcDescs[0]->getPrecision() != dstDescs[0]->getPrecision() || | ||
(srcDescs[0]->getPrecision() != InferenceEngine::Precision::FP32 && | ||
dstDescs[0]->getPrecision() != InferenceEngine::Precision::FP16 && | ||
dstDescs[0]->getPrecision() != InferenceEngine::Precision::I32)) | ||
return false; | ||
|
||
return true; | ||
} | ||
|
||
ReduceExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override { | ||
return std::make_shared<AclReduceExecutor>(context); | ||
} | ||
}; | ||
|
||
} // namespace intel_cpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.