Skip to content

Commit

Permalink
Add 'USE_CUDA_GRAPH' Configuration Option (openvinotoolkit#648)
Browse files Browse the repository at this point in the history
* [NVIDIA][CUDA Graphs] Rename CudaGraph class to ExecGraph

* [NVIDIA][CUDA Graphs] Add 'USE_CUDA_GRAPH' config option

---------

Co-authored-by: Nadezhda Ageeva <[email protected]>
  • Loading branch information
apavliuk55 and nkogteva authored May 4, 2023
1 parent b2147a3 commit 066a42c
Show file tree
Hide file tree
Showing 25 changed files with 59 additions and 39 deletions.
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/include/nvidia/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace ov {
namespace nvidia_gpu {

/**
* @brief Defines if optimization should be run for CUDA libraries
* @brief Defines if benchmarks should be run to determine fastest algorithms for some operations (e.g. Convolution)
*/
static constexpr Property<bool, PropertyMutability::RW> operation_benchmark{"NVIDIA_OPERATION_BENCHMARK"};

Expand Down
12 changes: 12 additions & 0 deletions modules/nvidia_plugin/src/cuda_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@

using namespace ov::nvidia_gpu;

namespace internal {
/**
* @brief Defines if NVIDIA Plugin should use CUDA graphs for performance acceleration
*/
static constexpr ov::Property<bool, ov::PropertyMutability::RW> use_cuda_graph{"NVIDIA_USE_CUDA_GRAPH"};

} // namespace internal

Configuration::Configuration() {}

std::vector<ov::PropertyName> Configuration::get_ro_properties() {
Expand Down Expand Up @@ -164,6 +172,8 @@ Configuration::Configuration(const ConfigMap& config, const Configuration& defau
streams_executor_config_.SetConfig(key, value);
} else if (ov::nvidia_gpu::operation_benchmark == key || NVIDIA_CONFIG_KEY(OPERATION_BENCHMARK) == key) {
operation_benchmark = ov::util::from_string(value, ov::nvidia_gpu::operation_benchmark);
} else if (internal::use_cuda_graph == key) {
use_cuda_graph = ov::util::from_string(value, internal::use_cuda_graph);
} else if (ov::enable_profiling == key) {
is_profiling_enabled = ov::util::from_string(value, ov::enable_profiling);
} else if (ov::hint::num_requests == key) {
Expand Down Expand Up @@ -198,6 +208,8 @@ InferenceEngine::Parameter Configuration::Get(const std::string& name) const {
return is_profiling_enabled;
} else if (name == ov::nvidia_gpu::operation_benchmark || name == NVIDIA_CONFIG_KEY(OPERATION_BENCHMARK)) {
return operation_benchmark;
} else if (name == internal::use_cuda_graph) {
return use_cuda_graph;
} else if (name == ov::num_streams) {
return (num_streams == 0) ?
ov::streams::Num(get_optimal_number_of_streams()) : num_streams;
Expand Down
2 changes: 2 additions & 0 deletions modules/nvidia_plugin/src/cuda_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ struct Configuration {
static constexpr uint32_t reasonable_limit_of_streams = 10;
int deviceId = 0;
InferenceEngine::IStreamsExecutor::Config streams_executor_config_;

private:
bool is_profiling_enabled = false;
bool operation_benchmark = false;
bool use_cuda_graph = true;
uint32_t hint_num_requests = 0;
ov::streams::Num num_streams = 0;
ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::LATENCY;
Expand Down
14 changes: 6 additions & 8 deletions modules/nvidia_plugin/src/cuda_executable_network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,7 @@ void ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Funct
// Apply common transformations
transformer.common_transform(device, function_, inputInfoMap, outputsInfoMap, cfg_);
// Clone model and additionally apply export specific transformations
export_function_ =
transformer.clone_and_export_transform(device, function_, inputInfoMap, outputsInfoMap, cfg_);
export_function_ = transformer.clone_and_export_transform(device, function_, inputInfoMap, outputsInfoMap, cfg_);
// CUDA-specific tranformations
transformer.cuda_transform(device, function_, cfg_);
// Generate backend specific blob mappings. For example Inference Engine uses not ov::Result nodes friendly name
Expand All @@ -142,7 +141,7 @@ void ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Funct
const bool opBenchOption = cfg_.Get(NVIDIA_CONFIG_KEY(OPERATION_BENCHMARK)).as<bool>();
const auto creationContext = CreationContext{device, opBenchOption};

graph_ = std::make_unique<CudaGraph>(creationContext, function_);
graph_ = std::make_unique<ExecGraph>(creationContext, function_);

memory_pool_ = CreateMemoryPool();
}
Expand Down Expand Up @@ -343,7 +342,8 @@ InferenceEngine::Parameter ExecutableNetwork::GetMetric(const std::string& name)
supported_properties.push_back(ov::PropertyName(ov::supported_properties.name(), PropertyMutability::RO));
supported_properties.push_back(ov::PropertyName(ov::model_name.name(), PropertyMutability::RO));
supported_properties.push_back(ov::PropertyName(ov::execution_devices.name(), PropertyMutability::RO));
supported_properties.push_back(ov::PropertyName(ov::optimal_number_of_infer_requests.name(), PropertyMutability::RO));
supported_properties.push_back(
ov::PropertyName(ov::optimal_number_of_infer_requests.name(), PropertyMutability::RO));
auto config_properties = cfg_.get_rw_properties();
supported_properties.insert(supported_properties.end(), config_properties.begin(), config_properties.end());
return decltype(ov::supported_properties)::value_type{supported_properties};
Expand All @@ -365,14 +365,12 @@ InferenceEngine::Parameter ExecutableNetwork::GetMetric(const std::string& name)
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
} else if (ov::model_name == name || EXEC_NETWORK_METRIC_KEY(NETWORK_NAME) == name) {
auto networkName = export_function_->get_friendly_name();
if (is_new_api)
return decltype(ov::model_name)::value_type{networkName};
if (is_new_api) return decltype(ov::model_name)::value_type{networkName};
IE_SET_METRIC_RETURN(NETWORK_NAME, networkName);
} else if (ov::optimal_number_of_infer_requests == name ||
EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) == name) {
const unsigned value = memory_pool_->Size();
if (is_new_api)
return decltype(ov::optimal_number_of_infer_requests)::value_type{value};
if (is_new_api) return decltype(ov::optimal_number_of_infer_requests)::value_type{value};
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, value);
} else if (ov::execution_devices == name) {
return decltype(ov::execution_devices)::value_type{plugin_->GetName() + "." + std::to_string(cfg_.deviceId)};
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/cuda_executable_network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDef
std::shared_ptr<ngraph::Function> function_;
std::map<std::string, std::size_t> input_index_;
std::map<std::string, std::size_t> output_index_;
std::unique_ptr<CudaGraph> graph_;
std::unique_ptr<ExecGraph> graph_;
std::shared_ptr<MemoryPool> memory_pool_;
};

Expand Down
4 changes: 2 additions & 2 deletions modules/nvidia_plugin/src/cuda_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
namespace ov {
namespace nvidia_gpu {

CudaGraph::CudaGraph(const CreationContext& context, const std::shared_ptr<const ngraph::Function>& function)
ExecGraph::ExecGraph(const CreationContext& context, const std::shared_ptr<const ngraph::Function>& function)
: SubGraph(context, function) {}

void CudaGraph::Run(const InferenceRequestContext& context, const DeviceMemBlock& memoryBlock) const {
void ExecGraph::Run(const InferenceRequestContext& context, const DeviceMemBlock& memoryBlock) const {
Workbuffers workbuffers{};
workbuffers.mutable_buffers.emplace_back(memoryBlock.view().data());
SubGraph::Execute(context, {}, {}, workbuffers);
Expand Down
6 changes: 3 additions & 3 deletions modules/nvidia_plugin/src/cuda_graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ class ExecNetworkTest;
namespace ov {
namespace nvidia_gpu {

class CudaGraph final : public SubGraph {
class ExecGraph final : public SubGraph {
public:
friend class ::ExecNetworkTest;

CudaGraph(const CreationContext& context, const std::shared_ptr<const ngraph::Function>& function);
~CudaGraph() override = default;
ExecGraph(const CreationContext& context, const std::shared_ptr<const ngraph::Function>& function);
~ExecGraph() override = default;

void Run(const InferenceRequestContext& context, const DeviceMemBlock& memoryBlock) const;
};
Expand Down
10 changes: 7 additions & 3 deletions modules/nvidia_plugin/src/cuda_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "cuda_plugin.hpp"
#include "ie_ngraph_utils.hpp"
#include "ngraph/util.hpp"
#include "nvidia/properties.hpp"

using namespace InferenceEngine;

Expand Down Expand Up @@ -92,7 +93,8 @@ CudaInferRequest::CudaInferRequest(const std::vector<std::shared_ptr<const ov::N
_executableNetwork(executableNetwork),
cancellation_token_{[this] { memory_proxy_.reset(); }},
profiler_{_executableNetwork->GetConfig(ov::enable_profiling.name()).as<bool>(), *_executableNetwork->graph_},
is_benchmark_mode_{isBenchmarkMode} {
is_benchmark_mode_{isBenchmarkMode},
use_cuda_graph_{_executableNetwork->GetConfig("NVIDIA_USE_CUDA_GRAPH").as<bool>()} {
this->setPointerToExecutableNetworkInternal(executableNetwork);
createInferRequest();
}
Expand All @@ -105,7 +107,8 @@ CudaInferRequest::CudaInferRequest(const InferenceEngine::InputsDataMap& network
_executableNetwork(executableNetwork),
cancellation_token_{[this] { memory_proxy_.reset(); }},
profiler_{_executableNetwork->GetConfig(ov::enable_profiling.name()).as<bool>(), *_executableNetwork->graph_},
is_benchmark_mode_{isBenchmarkMode} {
is_benchmark_mode_{isBenchmarkMode},
use_cuda_graph_{_executableNetwork->GetConfig("NVIDIA_USE_CUDA_GRAPH").as<bool>()} {
this->setPointerToExecutableNetworkInternal(executableNetwork);
createInferRequest();
}
Expand Down Expand Up @@ -283,7 +286,8 @@ void CudaInferRequest::startPipeline(const ThreadContext& threadContext) {
threadContext,
cancellation_token_,
profiler_,
is_benchmark_mode_};
is_benchmark_mode_,
use_cuda_graph_};
graph.Run(inferRequestContext, memory);
profiler_.StopStage(Profiler::StartPipeline);
} catch (...) {
Expand Down
1 change: 1 addition & 0 deletions modules/nvidia_plugin/src/cuda_infer_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class CudaInferRequest : public InferenceEngine::IInferRequestInternal {
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> input_tensors_;
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> output_tensors_;
bool is_benchmark_mode_;
bool use_cuda_graph_;
};
// ! [infer_request:header]

Expand Down
8 changes: 6 additions & 2 deletions modules/nvidia_plugin/src/cuda_inference_request_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,17 @@ class InferenceRequestContext {
const ThreadContext& threadContext,
CancellationToken& token,
Profiler& profiler,
bool isBenchmarkMode = false)
bool isBenchmarkMode = false,
bool useCudaGraph = true)
: threadContext{threadContext},
token{token},
profiler{profiler},
blob_inputs{inputs},
inputs_mapping{inputMapping},
blob_outputs{outputs},
outputs_mapping{outputMapping},
is_benchmark_mode_{isBenchmarkMode} {}
is_benchmark_mode_{isBenchmarkMode},
use_cuda_graph_{useCudaGraph} {}
// don't allow storing references to temporary
template <typename... Args>
InferenceRequestContext(InferenceEngine::BlobMap&& inputs, Args... args) = delete;
Expand Down Expand Up @@ -85,6 +87,7 @@ class InferenceRequestContext {
[[nodiscard]] ov::nvidia_gpu::CancellationToken& getCancellationToken() const noexcept { return token; }
[[nodiscard]] Profiler& getProfiler() const noexcept { return profiler; }
[[nodiscard]] bool isBenchmarkMode() const noexcept { return is_benchmark_mode_; }
[[nodiscard]] bool useCudaGraph() const noexcept { return use_cuda_graph_; }

private:
const ThreadContext& threadContext;
Expand All @@ -95,6 +98,7 @@ class InferenceRequestContext {
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>& blob_outputs;
const std::map<std::string, std::size_t>& outputs_mapping;
bool is_benchmark_mode_;
bool use_cuda_graph_;
};

} // namespace nvidia_gpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2669,7 +2669,7 @@ struct ClampBenchmark : testing::Test {
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> emptyTensor;
std::map<std::string, std::size_t> emptyMapping;
ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::Profiler profiler{false, graph};
ov::nvidia_gpu::InferenceRequestContext context{
emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
Expand All @@ -2696,7 +2696,7 @@ struct ClampBenchmark : testing::Test {
wbRequest.immutable_sizes[1] == minMaxSizeBytes);

ov::nvidia_gpu::IOperationExec::Buffers initBuffers{static_cast<DevPtr>(maxAlloc),
static_cast<DevPtr>(minAlloc)};
static_cast<DevPtr>(minAlloc)};
operation->InitSharedImmutableWorkbuffers(initBuffers);

workbuffers.immutable_buffers.emplace_back(static_cast<CDevPtr>(maxAlloc));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,6 @@ INSTANTIATE_TEST_CASE_P(smoke_Gather_v8_12,
::testing::Values(smoke_12_ov_params_v8.device_)),
Gather8LayerTest::getTestCaseName);


// ------------- Tacotron2 shapes -------------
const GatherTestParams tacotron2_enc_params_v1_v7 = {{148, 512}, {1, 1000}};

Expand Down Expand Up @@ -691,7 +690,7 @@ void test_one_shape(const GatherTestParams& params, bool is_v7) {
std::vector<devptr_t> outputs{out_alloc};

ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::Profiler profiler{false, graph};
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> emptyTensor;
std::map<std::string, std::size_t> emptyMapping;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ void testOneShape(const LSTMCellTestParams& params) {

std::vector<std::shared_ptr<ngraph::runtime::Tensor>> emptyTensor;
std::map<std::string, std::size_t> emptyMapping;
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::Profiler profiler{false, graph};
ov::nvidia_gpu::InferenceRequestContext context{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ MATCHER_P(FloatNearPointwise, tol, "Out of range") {
TEST_P(CudaRangeLayerTest, CompareWithRefs) {
ASSERT_TRUE(outputSize > 0);
ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::Profiler profiler{false, graph};
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> emptyTensor;
std::map<std::string, std::size_t> emptyMapping;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/tests/unit/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct ConcatTest : testing::Test {
auto concatOp = dynamic_cast<ConcatOp*>(operation.get());
ASSERT_TRUE(concatOp);
CancellationToken token{};
CudaGraph graph{CreationContext{CUDA::Device{}, false}, {}};
ExecGraph graph{CreationContext{CUDA::Device{}, false}, {}};
Profiler profiler{false, graph};
InferenceRequestContext context{
emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/tests/unit/convert_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ TEST_F(ConvertTest, DISABLED_benchmark) {
constexpr int kNumAttempts = 200;

auto& stream = threadContext.stream();
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::Profiler profiler{false, graph};
ov::nvidia_gpu::InferenceRequestContext context{
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/tests/unit/limits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ void run_zero_div_test() {
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> emptyTensor;
std::map<std::string, std::size_t> emptyMapping;
ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::Profiler profiler{false, graph};
ov::nvidia_gpu::InferenceRequestContext context{
emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/tests/unit/logical_not_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct LogicalNotBenchmark : testing::Test {
TEST_F(LogicalNotBenchmark, DISABLED_benchmark) {
constexpr int kNumAttempts = 20;
ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::Profiler profiler{false, graph};
ov::nvidia_gpu::InferenceRequestContext context{
emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
Expand Down
4 changes: 2 additions & 2 deletions modules/nvidia_plugin/tests/unit/parameter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ TEST_F(ParameterRegistryTest, GetOperationBuilder_Available) {

TEST_F(ParameterTest, canExecuteSync) {
CancellationToken token{};
CudaGraph graph{CreationContext{CUDA::Device{}, false}, {}};
ExecGraph graph{CreationContext{CUDA::Device{}, false}, {}};
Profiler profiler{false, graph};
InferenceRequestContext context{blobs, blobsMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
auto& stream = context.getThreadContext().stream();
Expand All @@ -98,7 +98,7 @@ TEST_F(ParameterTest, canExecuteSync) {

TEST_F(ParameterTest, canExecuteAsync) {
CancellationToken token{};
ov::nvidia_gpu::CudaGraph graph{CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::Profiler profiler{false, graph};
InferenceRequestContext context{blobs, blobsMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
auto& stream = context.getThreadContext().stream();
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/tests/unit/pooling_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct PoolingTest : testing::Test {
CUDA::Device device{};
const bool optimizeOption = false;
CancellationToken token{};
CudaGraph graph{CreationContext{CUDA::Device{}, false}, {}};
ExecGraph graph{CreationContext{CUDA::Device{}, false}, {}};
Profiler profiler{false, graph};
InferenceRequestContext context{
emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/tests/unit/relu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct ReluTest : testing::Test {

TEST_F(ReluTest, canExecuteSync) {
ov::nvidia_gpu::CancellationToken token{};
ov::nvidia_gpu::CudaGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::ExecGraph graph{ov::nvidia_gpu::CreationContext{CUDA::Device{}, false}, {}};
ov::nvidia_gpu::Profiler profiler{false, graph};
ov::nvidia_gpu::InferenceRequestContext context{
emptyTensor, emptyMapping, emptyTensor, emptyMapping, threadContext, token, profiler};
Expand Down
4 changes: 2 additions & 2 deletions modules/nvidia_plugin/tests/unit/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ TEST_F(ResultRegistryTest, GetOperationBuilder_Available) {

TEST_F(ResultTest, canExecuteSync) {
CancellationToken token{};
CudaGraph graph{CreationContext{CUDA::Device{}, false}, {}};
ExecGraph graph{CreationContext{CUDA::Device{}, false}, {}};
Profiler profiler{false, graph};
InferenceRequestContext context{emptyTensor, emptyMapping, blobs, blobsMapping, threadContext, token, profiler};
auto mem = blob->as<MemoryBlob>()->rmap();
Expand All @@ -117,7 +117,7 @@ TEST_F(ResultTest, canExecuteSync) {

TEST_F(ResultTest, canExecuteAsync) {
CancellationToken token{};
CudaGraph graph{CreationContext{CUDA::Device{}, false}, {}};
ExecGraph graph{CreationContext{CUDA::Device{}, false}, {}};
Profiler profiler{false, graph};
InferenceRequestContext context{emptyTensor, emptyMapping, blobs, blobsMapping, threadContext, token, profiler};
auto& stream = context.getThreadContext().stream();
Expand Down
Loading

0 comments on commit 066a42c

Please sign in to comment.