Skip to content

Commit

Permalink
[ARM CPU] Update TBB ACL Scheduler (openvinotoolkit#18885)
Browse files Browse the repository at this point in the history
  • Loading branch information
allnes authored Oct 13, 2023
1 parent 5170350 commit b7b5d4c
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ namespace intel_cpu {

using namespace arm_compute;

static std::mutex & get_mtx_ifunc() {
static std::mutex mtx_ifunc;
return mtx_ifunc;
}

inline VectorDims reshape_sizes(VectorDims dims) {
const size_t MAX_NUM_SHAPE = arm_compute::MAX_DIMS;
VectorDims result_dims(MAX_NUM_SHAPE - 1);
Expand Down Expand Up @@ -494,6 +499,11 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
default:
IE_THROW() << "Unsupported operation type for ACL Eltwise executor: " << static_cast<int>(aclEltwiseAttrs.algorithm);
}

// We get a problem (seg. faults, data race etc) for eltwise operations when we use several configure(...) functions in parallel.
// We created issue about this problem here: https://github.com/ARM-software/ComputeLibrary/issues/1073
// TODO: change it when we will get an answer to our question in issue
std::lock_guard<std::mutex> _lock {get_mtx_ifunc()};
ifunc = exec_func();
return true;
}
Expand Down
77 changes: 77 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (C) 2020-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_ie_scheduler.hpp"

#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include <ie_parallel.hpp>

namespace ov {
namespace intel_cpu {

using namespace arm_compute;

ACLScheduler::ACLScheduler() = default;

unsigned int ACLScheduler::num_threads() const {
return parallel_get_num_threads();
}

void ACLScheduler::set_num_threads(unsigned int num_threads) {}

void ACLScheduler::schedule_custom(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) {
const Window & max_window = window;
const unsigned int num_iterations = max_window.num_iterations_total();
const auto _num_threads = std::min(num_iterations, static_cast<unsigned int>(parallel_get_num_threads()));

if (num_iterations == 0) {
return;
}

std::function<void(const Window &window, const ThreadInfo &info)> main_run;
if (tensors.empty()) {
main_run = [&](const Window &window, const ThreadInfo &info) {
kernel->run(window, info);
};
} else {
main_run = [&](const Window &window, const ThreadInfo &info) {
kernel->run_op(tensors, window, info);
};
}

if (!kernel->is_parallelisable() || _num_threads == 1) {
ThreadInfo info;
info.cpu_info = &cpu_info();
main_run(max_window, info);
} else {
const auto num_windows = _num_threads;
const auto hints_split_dimension = hints.split_dimension();

InferenceEngine::parallel_for(num_windows, [&](int wid) {
Window win = max_window.split_window(hints_split_dimension, wid, num_windows);
win.validate();
main_run(win, {wid, static_cast<int>(_num_threads), &cpu_info()});
});
}
}

void ACLScheduler::schedule(ICPPKernel *kernel, const Hints &hints) {
ITensorPack tensors;
schedule_custom(kernel, hints, kernel->window(), tensors);
}

void ACLScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) {
schedule_custom(kernel, hints, window, tensors);
}

void ACLScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> &workloads) {
InferenceEngine::parallel_for(workloads.size(), [&](int wid) {
workloads[wid]({wid, static_cast<int>(parallel_get_num_threads()), &cpu_info()});
});
}

} // namespace intel_cpu
} // namespace ov
31 changes: 31 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_ie_scheduler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (C) 2020-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <arm_compute/runtime/Scheduler.h>
#include <arm_compute/core/CPP/ICPPKernel.h>
#include <arm_compute/core/ITensorPack.h>
#include "support/Mutex.h"

namespace ov {
namespace intel_cpu {

using namespace arm_compute;

class ACLScheduler final : public IScheduler {
public:
ACLScheduler();
~ACLScheduler() override = default;
std::uint32_t num_threads() const override;
void set_num_threads(unsigned int num_threads) override;
void schedule(ICPPKernel *kernel, const Hints &hints) override;
void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) override;
protected:
void run_workloads(std::vector<Workload> &workloads) override;
private:
void schedule_custom(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors);
};
} // namespace intel_cpu
} // namespace ov
38 changes: 38 additions & 0 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
#include <cpu/x64/cpu_isa_traits.hpp>
#include <itt.h>

#if defined(OV_CPU_WITH_ACL)
#include "nodes/executors/acl/acl_ie_scheduler.hpp"
#include "arm_compute/runtime/CPP/CPPScheduler.h"
#endif

using namespace InferenceEngine;

#define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: "
Expand Down Expand Up @@ -137,11 +142,44 @@ class CPUSpecialSetup {
};
#endif // __linux__

#if defined(OV_CPU_WITH_ACL)
std::mutex Engine::SchedulerGuard::mutex;
std::weak_ptr<Engine::SchedulerGuard> Engine::SchedulerGuard::ptr;

Engine::SchedulerGuard::SchedulerGuard() {
#if IE_THREAD == IE_THREAD_SEQ
// To save state for ACL cores in single-thread mode
arm_compute::Scheduler::set(arm_compute::Scheduler::Type::ST);
#else
arm_compute::Scheduler::set(std::make_shared<ACLScheduler>());
#endif
}

std::shared_ptr<Engine::SchedulerGuard> Engine::SchedulerGuard::instance() {
std::lock_guard<std::mutex> lock{SchedulerGuard::mutex};
auto scheduler_guard_ptr = SchedulerGuard::ptr.lock();
if (scheduler_guard_ptr == nullptr) {
SchedulerGuard::ptr = scheduler_guard_ptr = std::make_shared<SchedulerGuard>();
}
return scheduler_guard_ptr;
}

Engine::SchedulerGuard::~SchedulerGuard() {
// To save the state of scheduler after ACLScheduler has been executed
// TODO: find out the cause of the state
std::lock_guard<std::mutex> lock{this->dest_mutex};
arm_compute::Scheduler::set(arm_compute::Scheduler::Type::ST);
}
#endif

Engine::Engine() :
deviceFullName(getDeviceFullName()),
specialSetup(new CPUSpecialSetup) {
_pluginName = "CPU";
extensionManager->AddExtension(std::make_shared<Extension>());
#if defined(OV_CPU_WITH_ACL)
scheduler_guard = SchedulerGuard::instance();
#endif
}

Engine::~Engine() {
Expand Down
14 changes: 14 additions & 0 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ class Engine : public InferenceEngine::IInferencePlugin {
const std::string deviceFullName;

std::shared_ptr<void> specialSetup;

#if defined(OV_CPU_WITH_ACL)
struct SchedulerGuard {
SchedulerGuard();
~SchedulerGuard();
static std::shared_ptr<SchedulerGuard> instance();
static std::mutex mutex;
// separate mutex for saving ACLScheduler state in destructor
mutable std::mutex dest_mutex;
static std::weak_ptr<SchedulerGuard> ptr;
};

std::shared_ptr<SchedulerGuard> scheduler_guard;
#endif
};

} // namespace intel_cpu
Expand Down

0 comments on commit b7b5d4c

Please sign in to comment.