From da196784a4082852d689562fc87c290822316c83 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 2 Aug 2021 13:26:35 +0300 Subject: [PATCH] [SYCL] Fix initialization of interoperability memory objects (#4205) Fix an issue where the native memory object passed to interoperability memory object constructor was ignored on devices without host unified memory. --- .../include/CL/sycl/detail/sycl_mem_obj_t.hpp | 2 + .../source/detail/scheduler/graph_builder.cpp | 6 +-- sycl/source/detail/sycl_mem_obj_t.cpp | 2 + sycl/test/abi/sycl_symbols_linux.dump | 1 + .../scheduler/NoHostUnifiedMemory.cpp | 37 +++++++++++++++++++ 5 files changed, 45 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp index 03165fbe68b4e..5d16eb6034bd8 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp @@ -302,6 +302,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool hasUserDataPtr() const { return MUserPtr != nullptr; }; + bool isInterop() const; + protected: // An allocateMem helper that determines which host ptr to use void determineHostPtr(const ContextImplPtr &Context, bool InitFromUserData, diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 0396da30769c2..7f8c9d4f4b884 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -677,12 +677,12 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // unnecessary copy on devices with unified host memory support. const bool HostUnifiedMemory = checkHostUnifiedMemory(Queue->getContextImplPtr()); - const bool InitFromUserData = - Record->MAllocaCommands.empty() && HostUnifiedMemory; - AllocaCommandBase *LinkedAllocaCmd = nullptr; // TODO casting is required here to get the necessary information // without breaking ABI, replace with the next major version. auto *MemObj = static_cast(Req->MSYCLMemObj); + const bool InitFromUserData = Record->MAllocaCommands.empty() && + (HostUnifiedMemory || MemObj->isInterop()); + AllocaCommandBase *LinkedAllocaCmd = nullptr; // For the first allocation on a device without host unified memory we // might need to also create a host alloca right away in order to perform diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index f54e5c5d61624..8a43345738216 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -100,6 +100,8 @@ size_t SYCLMemObjT::getBufSizeForContext(const ContextImplPtr &Context, return BufSize; } +bool SYCLMemObjT::isInterop() const { return MOpenCLInterop; } + void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, bool InitFromUserData, void *&HostPtr, bool &HostPtrReadOnly) { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index eed93b8ca86f2..56bda0b5e7a6b 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4041,6 +4041,7 @@ _ZNK2cl4sycl6detail10image_implILi3EE7getTypeEv _ZNK2cl4sycl6detail10image_implILi3EE9get_countEv _ZNK2cl4sycl6detail10image_implILi3EE9get_rangeEv _ZNK2cl4sycl6detail11SYCLMemObjT9getPluginEv +_ZNK2cl4sycl6detail11SYCLMemObjT9isInteropEv _ZNK2cl4sycl6detail11stream_impl22get_max_statement_sizeEv _ZNK2cl4sycl6detail11stream_impl8get_sizeEv _ZNK2cl4sycl6detail12sampler_impl18get_filtering_modeEv diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp index ed66cc0c9f60f..d46b3cff9862b 100644 --- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp +++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp @@ -58,8 +58,19 @@ static pi_result redefinedEnqueueMemBufferWriteRect( return PI_SUCCESS; } +static pi_result redefinedMemRetain(pi_mem mem) { return PI_SUCCESS; } static pi_result redefinedMemRelease(pi_mem mem) { return PI_SUCCESS; } +static pi_context InteropPiContext = nullptr; +static pi_result redefinedMemGetInfo(pi_mem mem, cl_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + EXPECT_EQ(param_name, static_cast(CL_MEM_CONTEXT)); + auto *Result = reinterpret_cast(param_value); + *Result = InteropPiContext; + return PI_SUCCESS; +} + TEST_F(SchedulerTest, NoHostUnifiedMemory) { platform Plt{default_selector()}; if (Plt.is_host()) { @@ -75,7 +86,9 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { redefinedEnqueueMemBufferReadRect); Mock.redefine( redefinedEnqueueMemBufferWriteRect); + Mock.redefine(redefinedMemRetain); Mock.redefine(redefinedMemRelease); + Mock.redefine(redefinedMemGetInfo); cl::sycl::detail::QueueImplPtr QImpl = detail::getSyclObjImpl(Q); device HostDevice; @@ -185,4 +198,28 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // The current context for the record should still be modified. EXPECT_EQ(Record->MCurContext, DefaultHostQueue->getContextImplPtr()); } + // Check that interoperability memory objects are initialized. + { + cl_mem MockInteropBuffer = reinterpret_cast(1); + context InteropContext = Q.get_context(); + InteropPiContext = detail::getSyclObjImpl(InteropContext)->getHandleRef(); + std::shared_ptr BufI = std::make_shared< + detail::buffer_impl>( + MockInteropBuffer, Q.get_context(), /*BufSize*/ 8, + make_unique_ptr>(), + event()); + + detail::Requirement Req = getMockRequirement(); + Req.MSYCLMemObj = BufI.get(); + std::vector AuxCmds; + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); + detail::AllocaCommandBase *InteropAlloca = + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); + detail::EnqueueResultT Res; + MockScheduler::enqueueCommand(InteropAlloca, Res, detail::BLOCKING); + + EXPECT_EQ(Record->MAllocaCommands.size(), 1U); + EXPECT_EQ(InteropAlloca->MMemAllocation, MockInteropBuffer); + } }