diff --git a/legate_core_cpp.cmake b/legate_core_cpp.cmake index d9bbd10b7..6191cf2cf 100644 --- a/legate_core_cpp.cmake +++ b/legate_core_cpp.cmake @@ -205,8 +205,10 @@ list(APPEND legate_core_SOURCES src/core/runtime/projection.cc src/core/runtime/runtime.cc src/core/runtime/shard.cc + src/core/task/registrar.cc src/core/task/return.cc src/core/task/task.cc + src/core/task/variant.cc src/core/utilities/debug.cc src/core/utilities/deserializer.cc src/core/utilities/machine.cc @@ -355,6 +357,7 @@ install( install( FILES src/core/runtime/context.h + src/core/runtime/context.inl src/core/runtime/runtime.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/legate/core/runtime) @@ -362,6 +365,8 @@ install( FILES src/core/task/exception.h src/core/task/return.h src/core/task/task.h + src/core/task/task.inl + src/core/task/variant.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/legate/core/task) install( diff --git a/src/core/comm/coll.cc b/src/core/comm/coll.cc index 5f1a1f4e9..4bd76a758 100644 --- a/src/core/comm/coll.cc +++ b/src/core/comm/coll.cc @@ -35,7 +35,6 @@ namespace legate { namespace comm { namespace coll { -using namespace Legion; Logger log_coll("coll"); BackendNetwork* backend_network = nullptr; diff --git a/src/core/comm/comm_cpu.cc b/src/core/comm/comm_cpu.cc index 05c2f6283..7b0393f91 100644 --- a/src/core/comm/comm_cpu.cc +++ b/src/core/comm/comm_cpu.cc @@ -19,8 +19,6 @@ #include "core/comm/coll.h" -using namespace Legion; - namespace legate { namespace comm { namespace cpu { @@ -30,7 +28,7 @@ static int init_cpucoll_mapping(const Legion::Task* task, Legion::Context context, Legion::Runtime* runtime) { - Core::show_progress(task, context, runtime, task->get_task_name()); + Core::show_progress(task, context, runtime); int mpi_rank = 0; #if defined(LEGATE_USE_NETWORK) if (coll::backend_network->comm_type == coll::CollCommType::CollMPI) { @@ -46,7 +44,7 @@ static coll::CollComm init_cpucoll(const Legion::Task* task, Legion::Context context, Legion::Runtime* runtime) { - Core::show_progress(task, context, runtime, task->get_task_name()); + Core::show_progress(task, context, runtime); const int point = task->index_point[0]; int num_ranks = task->index_domain.get_volume(); @@ -80,7 +78,7 @@ static void finalize_cpucoll(const Legion::Task* task, Legion::Context context, Legion::Runtime* runtime) { - Core::show_progress(task, context, runtime, task->get_task_name()); + Core::show_progress(task, context, runtime); assert(task->futures.size() == 1); coll::CollComm comm = task->futures[0].get_result(); @@ -95,32 +93,29 @@ void register_tasks(Legion::Machine machine, Legion::Runtime* runtime, const LibraryContext& context) { - const InputArgs& command_args = Legion::Runtime::get_input_args(); - int argc = command_args.argc; - char** argv = command_args.argv; - coll::collInit(argc, argv); + const auto& command_args = Legion::Runtime::get_input_args(); + coll::collInit(command_args.argc, command_args.argv); - const TaskID init_cpucoll_mapping_task_id = - context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_MAPPING_TASK_ID); + auto init_cpucoll_mapping_task_id = context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_MAPPING_TASK_ID); const char* init_cpucoll_mapping_task_name = "core::comm::cpu::init_mapping"; runtime->attach_name(init_cpucoll_mapping_task_id, init_cpucoll_mapping_task_name, false /*mutable*/, true /*local only*/); - const TaskID init_cpucoll_task_id = context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_TASK_ID); + auto init_cpucoll_task_id = context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_TASK_ID); const char* init_cpucoll_task_name = "core::comm::cpu::init"; runtime->attach_name( init_cpucoll_task_id, init_cpucoll_task_name, false /*mutable*/, true /*local only*/); - const TaskID finalize_cpucoll_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_CPUCOLL_TASK_ID); + auto finalize_cpucoll_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_CPUCOLL_TASK_ID); const char* finalize_cpucoll_task_name = "core::comm::cpu::finalize"; runtime->attach_name( finalize_cpucoll_task_id, finalize_cpucoll_task_name, false /*mutable*/, true /*local only*/); auto make_registrar = [&](auto task_id, auto* task_name, auto proc_kind) { - TaskVariantRegistrar registrar(task_id, task_name); - registrar.add_constraint(ProcessorConstraint(proc_kind)); + Legion::TaskVariantRegistrar registrar(task_id, task_name); + registrar.add_constraint(Legion::ProcessorConstraint(proc_kind)); registrar.set_leaf(true); registrar.global_registration = false; return registrar; diff --git a/src/core/comm/comm_nccl.cu b/src/core/comm/comm_nccl.cu index 2e22b6e92..0f95fe74e 100644 --- a/src/core/comm/comm_nccl.cu +++ b/src/core/comm/comm_nccl.cu @@ -17,14 +17,14 @@ #include "core/comm/comm_nccl.h" #include "core/cuda/cuda_help.h" #include "core/cuda/stream_pool.h" +#include "core/data/buffer.h" #include "core/utilities/nvtx_help.h" +#include "core/utilities/typedefs.h" #include "legate.h" #include #include -using namespace Legion; - namespace legate { namespace comm { namespace nccl { @@ -59,7 +59,7 @@ static ncclUniqueId init_nccl_id(const Legion::Task* task, { legate::nvtx::Range auto_range("core::comm::nccl::init_id"); - Core::show_progress(task, context, runtime, task->get_task_name()); + Core::show_progress(task, context, runtime); ncclUniqueId id; CHECK_NCCL(ncclGetUniqueId(&id)); @@ -74,7 +74,7 @@ static ncclComm_t* init_nccl(const Legion::Task* task, { legate::nvtx::Range auto_range("core::comm::nccl::init"); - Core::show_progress(task, context, runtime, task->get_task_name()); + Core::show_progress(task, context, runtime); assert(task->futures.size() == 1); @@ -92,13 +92,8 @@ static ncclComm_t* init_nccl(const Legion::Task* task, // Perform a warm-up all-to-all - using namespace Legion; - - DeferredBuffer<_Payload, 1> src_buffer(Memory::GPU_FB_MEM, - Domain(Rect<1>{Point<1>{0}, Point<1>{num_ranks - 1}})); - - DeferredBuffer<_Payload, 1> tgt_buffer(Memory::GPU_FB_MEM, - Domain(Rect<1>{Point<1>{0}, Point<1>{num_ranks - 1}})); + auto src_buffer = create_buffer<_Payload>(num_ranks, Memory::Kind::GPU_FB_MEM); + auto tgt_buffer = create_buffer<_Payload>(num_ranks, Memory::Kind::GPU_FB_MEM); CHECK_NCCL(ncclGroupStart()); for (auto idx = 0; idx < num_ranks; ++idx) { @@ -119,7 +114,7 @@ static void finalize_nccl(const Legion::Task* task, { legate::nvtx::Range auto_range("core::comm::nccl::finalize"); - Core::show_progress(task, context, runtime, task->get_task_name()); + Core::show_progress(task, context, runtime); assert(task->futures.size() == 1); auto comm = task->futures[0].get_result(); @@ -131,24 +126,24 @@ void register_tasks(Legion::Machine machine, Legion::Runtime* runtime, const LibraryContext& context) { - const TaskID init_nccl_id_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_ID_TASK_ID); + auto init_nccl_id_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_ID_TASK_ID); const char* init_nccl_id_task_name = "core::comm::nccl::init_id"; runtime->attach_name( init_nccl_id_task_id, init_nccl_id_task_name, false /*mutable*/, true /*local only*/); - const TaskID init_nccl_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_TASK_ID); + auto init_nccl_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_TASK_ID); const char* init_nccl_task_name = "core::comm::nccl::init"; runtime->attach_name( init_nccl_task_id, init_nccl_task_name, false /*mutable*/, true /*local only*/); - const TaskID finalize_nccl_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_NCCL_TASK_ID); + auto finalize_nccl_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_NCCL_TASK_ID); const char* finalize_nccl_task_name = "core::comm::nccl::finalize"; runtime->attach_name( finalize_nccl_task_id, finalize_nccl_task_name, false /*mutable*/, true /*local only*/); auto make_registrar = [&](auto task_id, auto* task_name, auto proc_kind) { - TaskVariantRegistrar registrar(task_id, task_name); - registrar.add_constraint(ProcessorConstraint(proc_kind)); + Legion::TaskVariantRegistrar registrar(task_id, task_name); + registrar.add_constraint(Legion::ProcessorConstraint(proc_kind)); registrar.set_leaf(true); registrar.global_registration = false; return registrar; diff --git a/src/core/comm/local_comm.cc b/src/core/comm/local_comm.cc index 8adc4a2f3..29d317c38 100644 --- a/src/core/comm/local_comm.cc +++ b/src/core/comm/local_comm.cc @@ -27,7 +27,6 @@ namespace legate { namespace comm { namespace coll { -using namespace Legion; extern Logger log_coll; // public functions start from here @@ -348,4 +347,4 @@ void LocalNetwork::barrierLocal(CollComm global_comm) } // namespace coll } // namespace comm -} // namespace legate \ No newline at end of file +} // namespace legate diff --git a/src/core/comm/mpi_comm.cc b/src/core/comm/mpi_comm.cc index 1761701ff..114c82171 100644 --- a/src/core/comm/mpi_comm.cc +++ b/src/core/comm/mpi_comm.cc @@ -27,7 +27,6 @@ namespace legate { namespace comm { namespace coll { -using namespace Legion; extern Logger log_coll; enum CollTag : int { @@ -572,4 +571,4 @@ int MPINetwork::generateGatherTag(int rank, CollComm global_comm) } // namespace coll } // namespace comm -} // namespace legate \ No newline at end of file +} // namespace legate diff --git a/src/core/data/allocator.cc b/src/core/data/allocator.cc index 62051d05e..7f4512064 100644 --- a/src/core/data/allocator.cc +++ b/src/core/data/allocator.cc @@ -19,7 +19,7 @@ namespace legate { -ScopedAllocator::ScopedAllocator(Legion::Memory::Kind kind, bool scoped, size_t alignment) +ScopedAllocator::ScopedAllocator(Memory::Kind kind, bool scoped, size_t alignment) : target_kind_(kind), scoped_(scoped), alignment_(alignment) { } @@ -59,4 +59,4 @@ void ScopedAllocator::deallocate(void* ptr) buffer.destroy(); } -} // namespace legate \ No newline at end of file +} // namespace legate diff --git a/src/core/data/allocator.h b/src/core/data/allocator.h index 47d3c1a32..f6f057f7d 100644 --- a/src/core/data/allocator.h +++ b/src/core/data/allocator.h @@ -31,7 +31,7 @@ class ScopedAllocator { // Iff 'scoped', all allocations will be released upon destruction. // Otherwise this is up to the runtime after the task has finished. - ScopedAllocator(Legion::Memory::Kind kind, bool scoped = true, size_t alignment = 16); + ScopedAllocator(Memory::Kind kind, bool scoped = true, size_t alignment = 16); ~ScopedAllocator(); public: @@ -39,10 +39,10 @@ class ScopedAllocator { void deallocate(void* ptr); private: - Legion::Memory::Kind target_kind_{Legion::Memory::Kind::SYSTEM_MEM}; + Memory::Kind target_kind_{Memory::Kind::SYSTEM_MEM}; bool scoped_; size_t alignment_; std::unordered_map buffers_{}; }; -} // namespace legate \ No newline at end of file +} // namespace legate diff --git a/src/core/data/buffer.h b/src/core/data/buffer.h index 8c5a01b6c..90ec8d5c9 100644 --- a/src/core/data/buffer.h +++ b/src/core/data/buffer.h @@ -19,6 +19,7 @@ #include "legion.h" #include "core/utilities/machine.h" +#include "core/utilities/typedefs.h" namespace legate { @@ -43,11 +44,10 @@ using Buffer = Legion::DeferredBuffer; // after it's technically been deallocated. template -Buffer create_buffer(const Legion::Point& extents, - Legion::Memory::Kind kind = Legion::Memory::Kind::NO_MEMKIND, - size_t alignment = 16) +Buffer create_buffer(const Point& extents, + Memory::Kind kind = Memory::Kind::NO_MEMKIND, + size_t alignment = 16) { - using namespace Legion; if (Memory::Kind::NO_MEMKIND == kind) kind = find_memory_kind_for_executing_processor(false); auto hi = extents - Point::ONES(); // We just avoid creating empty buffers, as they cause all sorts of headaches. @@ -58,10 +58,10 @@ Buffer create_buffer(const Legion::Point& extents, template Buffer create_buffer(size_t size, - Legion::Memory::Kind kind = Legion::Memory::Kind::NO_MEMKIND, - size_t alignment = 16) + Memory::Kind kind = Memory::Kind::NO_MEMKIND, + size_t alignment = 16) { - return create_buffer(Legion::Point<1>(size), kind, alignment); + return create_buffer(Point<1>(size), kind, alignment); } } // namespace legate diff --git a/src/core/data/store.cc b/src/core/data/store.cc index c185ab602..bc7592c33 100644 --- a/src/core/data/store.cc +++ b/src/core/data/store.cc @@ -28,9 +28,7 @@ namespace legate { -using namespace Legion; - -RegionField::RegionField(int32_t dim, const PhysicalRegion& pr, FieldID fid) +RegionField::RegionField(int32_t dim, const Legion::PhysicalRegion& pr, Legion::FieldID fid) : dim_(dim), pr_(pr), fid_(fid) { auto priv = pr.get_privilege(); @@ -60,14 +58,18 @@ RegionField& RegionField::operator=(RegionField&& other) noexcept return *this; } -bool RegionField::valid() const { return pr_.get_logical_region() != LogicalRegion::NO_REGION; } +bool RegionField::valid() const +{ + return pr_.get_logical_region() != Legion::LogicalRegion::NO_REGION; +} Domain RegionField::domain() const { return dim_dispatch(dim_, get_domain_fn{}, pr_); } -OutputRegionField::OutputRegionField(const OutputRegion& out, FieldID fid) +OutputRegionField::OutputRegionField(const Legion::OutputRegion& out, Legion::FieldID fid) : out_(out), fid_(fid), - num_elements_(UntypedDeferredValue(sizeof(size_t), find_memory_kind_for_executing_processor())) + num_elements_( + Legion::UntypedDeferredValue(sizeof(size_t), find_memory_kind_for_executing_processor())) { } @@ -75,9 +77,9 @@ OutputRegionField::OutputRegionField(OutputRegionField&& other) noexcept : bound_(other.bound_), out_(other.out_), fid_(other.fid_), num_elements_(other.num_elements_) { other.bound_ = false; - other.out_ = OutputRegion(); + other.out_ = Legion::OutputRegion(); other.fid_ = -1; - other.num_elements_ = UntypedDeferredValue(); + other.num_elements_ = Legion::UntypedDeferredValue(); } OutputRegionField& OutputRegionField::operator=(OutputRegionField&& other) noexcept @@ -88,9 +90,9 @@ OutputRegionField& OutputRegionField::operator=(OutputRegionField&& other) noexc num_elements_ = other.num_elements_; other.bound_ = false; - other.out_ = OutputRegion(); + other.out_ = Legion::OutputRegion(); other.fid_ = -1; - other.num_elements_ = UntypedDeferredValue(); + other.num_elements_ = Legion::UntypedDeferredValue(); return *this; } @@ -125,8 +127,11 @@ void OutputRegionField::update_num_elements(size_t num_elements) acc[0] = num_elements; } -FutureWrapper::FutureWrapper( - bool read_only, int32_t field_size, Domain domain, Future future, bool initialize /*= false*/) +FutureWrapper::FutureWrapper(bool read_only, + int32_t field_size, + Domain domain, + Legion::Future future, + bool initialize /*= false*/) : read_only_(read_only), field_size_(field_size), domain_(domain), future_(future) { #ifdef DEBUG_LEGATE @@ -148,16 +153,16 @@ FutureWrapper::FutureWrapper( #ifdef LEGATE_USE_CUDA if (mem_kind == Memory::Kind::GPU_FB_MEM) { // TODO: This should be done by Legion - buffer_ = UntypedDeferredValue(field_size, mem_kind); + buffer_ = Legion::UntypedDeferredValue(field_size, mem_kind); AccessorWO acc(buffer_, field_size, false); auto stream = cuda::StreamPool::get_stream_pool().get_stream(); CHECK_CUDA( cudaMemcpyAsync(acc.ptr(0), p_init_value, field_size, cudaMemcpyDeviceToDevice, stream)); } else #endif - buffer_ = UntypedDeferredValue(field_size, mem_kind, p_init_value); + buffer_ = Legion::UntypedDeferredValue(field_size, mem_kind, p_init_value); } else - buffer_ = UntypedDeferredValue(field_size, mem_kind); + buffer_ = Legion::UntypedDeferredValue(field_size, mem_kind); } } @@ -187,7 +192,7 @@ void FutureWrapper::initialize_with_identity(int32_t redop_id) auto untyped_acc = AccessorWO(buffer_, field_size_); auto ptr = untyped_acc.ptr(0); - auto redop = Runtime::get_reduction_op(redop_id); + auto redop = Legion::Runtime::get_reduction_op(redop_id); #ifdef DEBUG_LEGATE assert(redop->sizeof_lhs == field_size_); #endif diff --git a/src/core/data/store.h b/src/core/data/store.h index f21c820fc..0ae7231bc 100644 --- a/src/core/data/store.h +++ b/src/core/data/store.h @@ -59,7 +59,7 @@ class RegionField { ACC operator()(const Legion::PhysicalRegion& pr, Legion::FieldID fid, const Legion::AffineTransform& transform, - const Legion::Rect& bounds) + const Rect& bounds) { return ACC(pr, fid, transform, bounds); } @@ -76,7 +76,7 @@ class RegionField { Legion::FieldID fid, int32_t redop_id, const Legion::AffineTransform& transform, - const Legion::Rect& bounds) + const Rect& bounds) { return ACC(pr, fid, redop_id, transform, bounds); } @@ -84,9 +84,9 @@ class RegionField { struct get_domain_fn { template - Legion::Domain operator()(const Legion::PhysicalRegion& pr) + Domain operator()(const Legion::PhysicalRegion& pr) { - return Legion::Domain(pr.get_bounds()); + return Domain(pr.get_bounds()); } }; @@ -113,35 +113,34 @@ class RegionField { public: template - AccessorRO read_accessor(const Legion::Rect& bounds) const; + AccessorRO read_accessor(const Rect& bounds) const; template - AccessorWO write_accessor(const Legion::Rect& bounds) const; + AccessorWO write_accessor(const Rect& bounds) const; template - AccessorRW read_write_accessor(const Legion::Rect& bounds) const; + AccessorRW read_write_accessor(const Rect& bounds) const; template - AccessorRD reduce_accessor(int32_t redop_id, - const Legion::Rect& bounds) const; + AccessorRD reduce_accessor(int32_t redop_id, const Rect& bounds) const; public: template - AccessorRO read_accessor(const Legion::Rect& bounds, + AccessorRO read_accessor(const Rect& bounds, const Legion::DomainAffineTransform& transform) const; template - AccessorWO write_accessor(const Legion::Rect& bounds, + AccessorWO write_accessor(const Rect& bounds, const Legion::DomainAffineTransform& transform) const; template - AccessorRW read_write_accessor(const Legion::Rect& bounds, + AccessorRW read_write_accessor(const Rect& bounds, const Legion::DomainAffineTransform& transform) const; template AccessorRD reduce_accessor( int32_t redop_id, - const Legion::Rect& bounds, + const Rect& bounds, const Legion::DomainAffineTransform& transform) const; public: template - Legion::Rect shape() const; - Legion::Domain domain() const; + Rect shape() const; + Domain domain() const; public: bool is_readable() const { return readable_; } @@ -177,11 +176,11 @@ class OutputRegionField { public: template - Buffer create_output_buffer(const Legion::Point& extents, bool return_buffer); + Buffer create_output_buffer(const Point& extents, bool return_buffer); public: template - void return_data(Buffer& buffer, const Legion::Point& extents); + void return_data(Buffer& buffer, const Point& extents); void make_empty(int32_t dim); public: @@ -202,7 +201,7 @@ class FutureWrapper { FutureWrapper() {} FutureWrapper(bool read_only, int32_t field_size, - Legion::Domain domain, + Domain domain, Legion::Future future, bool initialize = false); @@ -225,14 +224,13 @@ class FutureWrapper { public: template - AccessorRO read_accessor(const Legion::Rect& bounds) const; + AccessorRO read_accessor(const Rect& bounds) const; template - AccessorWO write_accessor(const Legion::Rect& bounds) const; + AccessorWO write_accessor(const Rect& bounds) const; template - AccessorRW read_write_accessor(const Legion::Rect& bounds) const; + AccessorRW read_write_accessor(const Rect& bounds) const; template - AccessorRD reduce_accessor(int32_t redop_id, - const Legion::Rect& bounds) const; + AccessorRD reduce_accessor(int32_t redop_id, const Rect& bounds) const; public: template @@ -240,8 +238,8 @@ class FutureWrapper { public: template - Legion::Rect shape() const; - Legion::Domain domain() const; + Rect shape() const; + Domain domain() const; public: void initialize_with_identity(int32_t redop_id); @@ -252,7 +250,7 @@ class FutureWrapper { private: bool read_only_{true}; size_t field_size_{0}; - Legion::Domain domain_{}; + Domain domain_{}; Legion::Future future_{}; Legion::UntypedDeferredValue buffer_{}; }; @@ -307,23 +305,22 @@ class Store { public: template - AccessorRO read_accessor(const Legion::Rect& bounds) const; + AccessorRO read_accessor(const Rect& bounds) const; template - AccessorWO write_accessor(const Legion::Rect& bounds) const; + AccessorWO write_accessor(const Rect& bounds) const; template - AccessorRW read_write_accessor(const Legion::Rect& bounds) const; + AccessorRW read_write_accessor(const Rect& bounds) const; template - AccessorRD reduce_accessor(const Legion::Rect& bounds) const; + AccessorRD reduce_accessor(const Rect& bounds) const; public: template - Buffer create_output_buffer(const Legion::Point& extents, - bool return_buffer = false); + Buffer create_output_buffer(const Point& extents, bool return_buffer = false); public: template - Legion::Rect shape() const; - Legion::Domain domain() const; + Rect shape() const; + Domain domain() const; public: bool is_readable() const { return readable_; } @@ -336,7 +333,7 @@ class Store { public: template - void return_data(Buffer& buffer, const Legion::Point& extents); + void return_data(Buffer& buffer, const Point& extents); void make_empty(); public: diff --git a/src/core/data/store.inl b/src/core/data/store.inl index 7dc1d38db..d3cd6e594 100644 --- a/src/core/data/store.inl +++ b/src/core/data/store.inl @@ -72,32 +72,32 @@ AccessorRD RegionField::reduce_accessor( } template -AccessorRO RegionField::read_accessor(const Legion::Rect& bounds) const +AccessorRO RegionField::read_accessor(const Rect& bounds) const { return AccessorRO(pr_, fid_, bounds); } template -AccessorWO RegionField::write_accessor(const Legion::Rect& bounds) const +AccessorWO RegionField::write_accessor(const Rect& bounds) const { return AccessorWO(pr_, fid_, bounds); } template -AccessorRW RegionField::read_write_accessor(const Legion::Rect& bounds) const +AccessorRW RegionField::read_write_accessor(const Rect& bounds) const { return AccessorRW(pr_, fid_, bounds); } template AccessorRD RegionField::reduce_accessor(int32_t redop_id, - const Legion::Rect& bounds) const + const Rect& bounds) const { return AccessorRD(pr_, fid_, redop_id, bounds); } template -AccessorRO RegionField::read_accessor(const Legion::Rect& bounds, +AccessorRO RegionField::read_accessor(const Rect& bounds, const Legion::DomainAffineTransform& transform) const { using ACC = AccessorRO; @@ -106,7 +106,7 @@ AccessorRO RegionField::read_accessor(const Legion::Rect& bounds, } template -AccessorWO RegionField::write_accessor(const Legion::Rect& bounds, +AccessorWO RegionField::write_accessor(const Rect& bounds, const Legion::DomainAffineTransform& transform) const { using ACC = AccessorWO; @@ -116,7 +116,7 @@ AccessorWO RegionField::write_accessor(const Legion::Rect& bounds, template AccessorRW RegionField::read_write_accessor( - const Legion::Rect& bounds, const Legion::DomainAffineTransform& transform) const + const Rect& bounds, const Legion::DomainAffineTransform& transform) const { using ACC = AccessorRW; return dim_dispatch( @@ -125,9 +125,7 @@ AccessorRW RegionField::read_write_accessor( template AccessorRD RegionField::reduce_accessor( - int32_t redop_id, - const Legion::Rect& bounds, - const Legion::DomainAffineTransform& transform) const + int32_t redop_id, const Rect& bounds, const Legion::DomainAffineTransform& transform) const { using ACC = AccessorRD; return dim_dispatch( @@ -135,9 +133,9 @@ AccessorRD RegionField::reduce_accessor( } template -Legion::Rect RegionField::shape() const +Rect RegionField::shape() const { - return Legion::Rect(pr_); + return Rect(pr_); } template @@ -147,7 +145,7 @@ AccessorRO FutureWrapper::read_accessor() const assert(sizeof(T) == field_size_); #endif if (read_only_) { - auto memkind = Legion::Memory::Kind::NO_MEMKIND; + auto memkind = Memory::Kind::NO_MEMKIND; return AccessorRO(future_, memkind); } else return AccessorRO(buffer_); @@ -184,20 +182,20 @@ AccessorRD FutureWrapper::reduce_accessor(int32_t redop_id) } template -AccessorRO FutureWrapper::read_accessor(const Legion::Rect& bounds) const +AccessorRO FutureWrapper::read_accessor(const Rect& bounds) const { #ifdef DEBUG_LEGATE assert(sizeof(T) == field_size_); #endif if (read_only_) { - auto memkind = Legion::Memory::Kind::NO_MEMKIND; + auto memkind = Memory::Kind::NO_MEMKIND; return AccessorRO(future_, bounds, memkind); } else return AccessorRO(buffer_, bounds); } template -AccessorWO FutureWrapper::write_accessor(const Legion::Rect& bounds) const +AccessorWO FutureWrapper::write_accessor(const Rect& bounds) const { #ifdef DEBUG_LEGATE assert(sizeof(T) == field_size_); @@ -207,7 +205,7 @@ AccessorWO FutureWrapper::write_accessor(const Legion::Rect& bounds } template -AccessorRW FutureWrapper::read_write_accessor(const Legion::Rect& bounds) const +AccessorRW FutureWrapper::read_write_accessor(const Rect& bounds) const { #ifdef DEBUG_LEGATE assert(sizeof(T) == field_size_); @@ -218,7 +216,7 @@ AccessorRW FutureWrapper::read_write_accessor(const Legion::Rect& b template AccessorRD FutureWrapper::reduce_accessor(int32_t redop_id, - const Legion::Rect& bounds) const + const Rect& bounds) const { #ifdef DEBUG_LEGATE assert(sizeof(typename OP::LHS) == field_size_); @@ -228,9 +226,9 @@ AccessorRD FutureWrapper::reduce_accessor(int32_t redop_id, } template -Legion::Rect FutureWrapper::shape() const +Rect FutureWrapper::shape() const { - return Legion::Rect(domain()); + return Rect(domain()); } template @@ -246,7 +244,7 @@ VAL FutureWrapper::scalar() const } template -Buffer OutputRegionField::create_output_buffer(const Legion::Point& extents, +Buffer OutputRegionField::create_output_buffer(const Point& extents, bool return_buffer) { if (return_buffer) { @@ -261,7 +259,7 @@ Buffer OutputRegionField::create_output_buffer(const Legion::Point& } template -void OutputRegionField::return_data(Buffer& buffer, const Legion::Point& extents) +void OutputRegionField::return_data(Buffer& buffer, const Point& extents) { #ifdef DEBUG_LEGATE assert(!bound_); @@ -337,7 +335,7 @@ AccessorRD Store::reduce_accessor() const } template -AccessorRO Store::read_accessor(const Legion::Rect& bounds) const +AccessorRO Store::read_accessor(const Rect& bounds) const { #ifdef DEBUG_LEGATE check_accessor_dimension(DIM); @@ -353,7 +351,7 @@ AccessorRO Store::read_accessor(const Legion::Rect& bounds) const } template -AccessorWO Store::write_accessor(const Legion::Rect& bounds) const +AccessorWO Store::write_accessor(const Rect& bounds) const { #ifdef DEBUG_LEGATE check_accessor_dimension(DIM); @@ -369,7 +367,7 @@ AccessorWO Store::write_accessor(const Legion::Rect& bounds) const } template -AccessorRW Store::read_write_accessor(const Legion::Rect& bounds) const +AccessorRW Store::read_write_accessor(const Rect& bounds) const { #ifdef DEBUG_LEGATE check_accessor_dimension(DIM); @@ -385,7 +383,7 @@ AccessorRW Store::read_write_accessor(const Legion::Rect& bounds) c } template -AccessorRD Store::reduce_accessor(const Legion::Rect& bounds) const +AccessorRD Store::reduce_accessor(const Rect& bounds) const { #ifdef DEBUG_LEGATE check_accessor_dimension(DIM); @@ -401,7 +399,7 @@ AccessorRD Store::reduce_accessor(const Legion::Rect& b } template -Buffer Store::create_output_buffer(const Legion::Point& extents, +Buffer Store::create_output_buffer(const Point& extents, bool return_buffer /*= false*/) { #ifdef DEBUG_LEGATE @@ -412,7 +410,7 @@ Buffer Store::create_output_buffer(const Legion::Point& extents, } template -Legion::Rect Store::shape() const +Rect Store::shape() const { #ifdef DEBUG_LEGATE if (!(DIM == dim_ || (dim_ == 0 && DIM == 1))) { @@ -426,8 +424,8 @@ Legion::Rect Store::shape() const if (dom.dim > 0) return dom.bounds(); else { - auto p = Legion::Point::ZEROES(); - return Legion::Rect(p, p); + auto p = Point::ZEROES(); + return Rect(p, p); } } @@ -441,7 +439,7 @@ VAL Store::scalar() const } template -void Store::return_data(Buffer& buffer, const Legion::Point& extents) +void Store::return_data(Buffer& buffer, const Point& extents) { #ifdef DEBUG_LEGATE check_valid_return(); diff --git a/src/core/data/transform.cc b/src/core/data/transform.cc index 07ab5f7ef..93d8e6956 100644 --- a/src/core/data/transform.cc +++ b/src/core/data/transform.cc @@ -18,11 +18,10 @@ namespace legate { -using namespace Legion; - -DomainAffineTransform combine(const DomainAffineTransform& lhs, const DomainAffineTransform& rhs) +Legion::DomainAffineTransform combine(const Legion::DomainAffineTransform& lhs, + const Legion::DomainAffineTransform& rhs) { - DomainAffineTransform result; + Legion::DomainAffineTransform result; auto transform = lhs.transform * rhs.transform; auto offset = lhs.transform * rhs.offset + lhs.offset; result.transform = transform; @@ -37,7 +36,7 @@ TransformStack::TransformStack(std::unique_ptr&& transform, { } -Legion::Domain TransformStack::transform(const Legion::Domain& input) const +Domain TransformStack::transform(const Domain& input) const { #ifdef DEBUG_LEGATE assert(transform_ != nullptr); @@ -100,12 +99,12 @@ Domain Shift::transform(const Domain& input) const return result; } -DomainAffineTransform Shift::inverse_transform(int32_t in_dim) const +Legion::DomainAffineTransform Shift::inverse_transform(int32_t in_dim) const { assert(dim_ < in_dim); auto out_dim = in_dim; - DomainTransform transform; + Legion::DomainTransform transform; transform.m = out_dim; transform.n = in_dim; for (int32_t i = 0; i < out_dim; ++i) @@ -116,7 +115,7 @@ DomainAffineTransform Shift::inverse_transform(int32_t in_dim) const offset.dim = out_dim; for (int32_t i = 0; i < out_dim; ++i) offset[i] = i == dim_ ? -offset_ : 0; - DomainAffineTransform result; + Legion::DomainAffineTransform result; result.transform = transform; result.offset = offset; return result; @@ -152,12 +151,12 @@ Domain Promote::transform(const Domain& input) const return output; } -DomainAffineTransform Promote::inverse_transform(int32_t in_dim) const +Legion::DomainAffineTransform Promote::inverse_transform(int32_t in_dim) const { assert(extra_dim_ < in_dim); auto out_dim = in_dim - 1; - DomainTransform transform; + Legion::DomainTransform transform; transform.m = std::max(out_dim, 1); transform.n = in_dim; for (int32_t i = 0; i < transform.m; ++i) @@ -171,7 +170,7 @@ DomainAffineTransform Promote::inverse_transform(int32_t in_dim) const offset.dim = std::max(out_dim, 1); for (int32_t i = 0; i < transform.m; ++i) offset[i] = 0; - DomainAffineTransform result; + Legion::DomainAffineTransform result; result.transform = transform; result.offset = offset; return result; @@ -202,12 +201,12 @@ Domain Project::transform(const Domain& input) const return output; } -DomainAffineTransform Project::inverse_transform(int32_t in_dim) const +Legion::DomainAffineTransform Project::inverse_transform(int32_t in_dim) const { auto out_dim = in_dim + 1; assert(dim_ < out_dim); - DomainTransform transform; + Legion::DomainTransform transform; transform.m = out_dim; if (in_dim == 0) { transform.n = out_dim; @@ -225,7 +224,7 @@ DomainAffineTransform Project::inverse_transform(int32_t in_dim) const offset.dim = out_dim; for (int32_t i = 0; i < out_dim; ++i) offset[i] = i == dim_ ? coord_ : 0; - DomainAffineTransform result; + Legion::DomainAffineTransform result; result.transform = transform; result.offset = offset; return result; @@ -254,9 +253,9 @@ Domain Transpose::transform(const Domain& input) const return output; } -DomainAffineTransform Transpose::inverse_transform(int32_t in_dim) const +Legion::DomainAffineTransform Transpose::inverse_transform(int32_t in_dim) const { - DomainTransform transform; + Legion::DomainTransform transform; transform.m = in_dim; transform.n = in_dim; for (int32_t i = 0; i < in_dim; ++i) @@ -268,7 +267,7 @@ DomainAffineTransform Transpose::inverse_transform(int32_t in_dim) const offset.dim = in_dim; for (int32_t i = 0; i < in_dim; ++i) offset[i] = 0; - DomainAffineTransform result; + Legion::DomainAffineTransform result; result.transform = transform; result.offset = offset; return result; @@ -338,9 +337,9 @@ Domain Delinearize::transform(const Domain& input) const return delinearize(dim_, sizes_.size(), strides_, input); } -DomainAffineTransform Delinearize::inverse_transform(int32_t in_dim) const +Legion::DomainAffineTransform Delinearize::inverse_transform(int32_t in_dim) const { - DomainTransform transform; + Legion::DomainTransform transform; int32_t out_dim = in_dim - strides_.size() + 1; transform.m = out_dim; transform.n = in_dim; @@ -357,7 +356,7 @@ DomainAffineTransform Delinearize::inverse_transform(int32_t in_dim) const offset.dim = out_dim; for (int32_t i = 0; i < out_dim; ++i) offset[i] = 0; - DomainAffineTransform result; + Legion::DomainAffineTransform result; result.transform = transform; result.offset = offset; return result; diff --git a/src/core/data/transform.h b/src/core/data/transform.h index fbdd2e4a8..a925b6bf0 100644 --- a/src/core/data/transform.h +++ b/src/core/data/transform.h @@ -18,12 +18,12 @@ #include -#include "legion.h" +#include "core/utilities/typedefs.h" namespace legate { struct Transform { - virtual Legion::Domain transform(const Legion::Domain& input) const = 0; + virtual Domain transform(const Domain& input) const = 0; virtual Legion::DomainAffineTransform inverse_transform(int32_t in_dim) const = 0; virtual void print(std::ostream& out) const = 0; }; @@ -40,7 +40,7 @@ struct TransformStack : public Transform { std::shared_ptr&& parent); public: - virtual Legion::Domain transform(const Legion::Domain& input) const override; + virtual Domain transform(const Domain& input) const override; virtual Legion::DomainAffineTransform inverse_transform(int32_t in_dim) const override; virtual void print(std::ostream& out) const override; @@ -61,7 +61,7 @@ class Shift : public StoreTransform { Shift(int32_t dim, int64_t offset); public: - virtual Legion::Domain transform(const Legion::Domain& input) const override; + virtual Domain transform(const Domain& input) const override; virtual Legion::DomainAffineTransform inverse_transform(int32_t in_dim) const override; virtual void print(std::ostream& out) const override; @@ -78,7 +78,7 @@ class Promote : public StoreTransform { Promote(int32_t extra_dim, int64_t dim_size); public: - virtual Legion::Domain transform(const Legion::Domain& input) const override; + virtual Domain transform(const Domain& input) const override; virtual Legion::DomainAffineTransform inverse_transform(int32_t in_dim) const override; virtual void print(std::ostream& out) const override; @@ -96,7 +96,7 @@ class Project : public StoreTransform { virtual ~Project() {} public: - virtual Legion::Domain transform(const Legion::Domain& domain) const override; + virtual Domain transform(const Domain& domain) const override; virtual Legion::DomainAffineTransform inverse_transform(int32_t in_dim) const override; virtual void print(std::ostream& out) const override; @@ -113,7 +113,7 @@ class Transpose : public StoreTransform { Transpose(std::vector&& axes); public: - virtual Legion::Domain transform(const Legion::Domain& domain) const override; + virtual Domain transform(const Domain& domain) const override; virtual Legion::DomainAffineTransform inverse_transform(int32_t in_dim) const override; virtual void print(std::ostream& out) const override; @@ -129,7 +129,7 @@ class Delinearize : public StoreTransform { Delinearize(int32_t dim, std::vector&& sizes); public: - virtual Legion::Domain transform(const Legion::Domain& domain) const override; + virtual Domain transform(const Domain& domain) const override; virtual Legion::DomainAffineTransform inverse_transform(int32_t in_dim) const override; virtual void print(std::ostream& out) const override; diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index b692ee9c0..85a2c7427 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -30,12 +30,6 @@ #include "core/utilities/linearize.h" #include "legate_defines.h" -using LegionTask = Legion::Task; -using LegionCopy = Legion::Copy; - -using namespace Legion; -using namespace Legion::Mapping; - namespace legate { namespace mapping { @@ -54,9 +48,9 @@ const std::vector& default_store_targets(Processor::Kind kind) return finder->second; } -std::string log_mappable(const Mappable& mappable, bool prefix_only = false) +std::string log_mappable(const Legion::Mappable& mappable, bool prefix_only = false) { - static const std::map prefixes = { + static const std::map prefixes = { {LEGION_TASK_MAPPABLE, "Task "}, {LEGION_COPY_MAPPABLE, "Copy "}, {LEGION_INLINE_MAPPABLE, "Inline mapping "}, @@ -75,20 +69,24 @@ std::string log_mappable(const Mappable& mappable, bool prefix_only = false) } // namespace -BaseMapper::BaseMapper(Runtime* rt, Machine m, const LibraryContext& ctx) +BaseMapper::BaseMapper(std::unique_ptr legate_mapper, + Legion::Runtime* rt, + Legion::Machine m, + const LibraryContext& ctx) : Mapper(rt->get_mapper_runtime()), + legate_mapper_(std::move(legate_mapper)), legion_runtime(rt), machine(m), context(ctx), local_node(get_local_node()), - total_nodes(get_total_nodes(m)), + total_nodes_(get_total_nodes(m)), mapper_name(std::move(create_name(local_node))), logger(create_logger_name().c_str()), local_instances(InstanceManager::get_instance_manager()), reduction_instances(ReductionInstanceManager::get_instance_manager()) { // Query to find all our local processors - Machine::ProcessorQuery local_procs(machine); + Legion::Machine::ProcessorQuery local_procs(machine); local_procs.local_address_space(); for (auto local_proc : local_procs) { switch (local_proc.kind()) { @@ -108,20 +106,20 @@ BaseMapper::BaseMapper(Runtime* rt, Machine m, const LibraryContext& ctx) } } // Now do queries to find all our local memories - Machine::MemoryQuery local_sysmem(machine); + Legion::Machine::MemoryQuery local_sysmem(machine); local_sysmem.local_address_space(); local_sysmem.only_kind(Memory::SYSTEM_MEM); assert(local_sysmem.count() > 0); local_system_memory = local_sysmem.first(); if (!local_gpus.empty()) { - Machine::MemoryQuery local_zcmem(machine); + Legion::Machine::MemoryQuery local_zcmem(machine); local_zcmem.local_address_space(); local_zcmem.only_kind(Memory::Z_COPY_MEM); assert(local_zcmem.count() > 0); local_zerocopy_memory = local_zcmem.first(); } for (auto& local_gpu : local_gpus) { - Machine::MemoryQuery local_framebuffer(machine); + Legion::Machine::MemoryQuery local_framebuffer(machine); local_framebuffer.local_address_space(); local_framebuffer.only_kind(Memory::GPU_FB_MEM); local_framebuffer.best_affinity_to(local_gpu); @@ -129,7 +127,7 @@ BaseMapper::BaseMapper(Runtime* rt, Machine m, const LibraryContext& ctx) local_frame_buffers[local_gpu] = local_framebuffer.first(); } for (auto& local_omp : local_omps) { - Machine::MemoryQuery local_numa(machine); + Legion::Machine::MemoryQuery local_numa(machine); local_numa.local_address_space(); local_numa.only_kind(Memory::SOCKET_MEM); local_numa.best_affinity_to(local_omp); @@ -139,9 +137,11 @@ BaseMapper::BaseMapper(Runtime* rt, Machine m, const LibraryContext& ctx) local_numa_domains[local_omp] = local_system_memory; } generate_prime_factors(); + + legate_mapper_->set_machine(this); } -BaseMapper::~BaseMapper(void) +BaseMapper::~BaseMapper() { // Compute the size of all our remaining instances in each memory const char* show_usage = getenv("LEGATE_SHOW_USAGE"); @@ -168,22 +168,22 @@ BaseMapper::~BaseMapper(void) } } -/*static*/ AddressSpace BaseMapper::get_local_node(void) +/*static*/ Legion::AddressSpace BaseMapper::get_local_node() { Processor p = Processor::get_executing_processor(); return p.address_space(); } -/*static*/ size_t BaseMapper::get_total_nodes(Machine m) +/*static*/ size_t BaseMapper::get_total_nodes(Legion::Machine m) { - Machine::ProcessorQuery query(m); + Legion::Machine::ProcessorQuery query(m); query.only_kind(Processor::LOC_PROC); - std::set spaces; + std::set spaces; for (auto proc : query) spaces.insert(proc.address_space()); return spaces.size(); } -std::string BaseMapper::create_name(AddressSpace node) const +std::string BaseMapper::create_name(Legion::AddressSpace node) const { std::stringstream ss; ss << context.get_library_name() << " on Node " << node; @@ -197,15 +197,15 @@ std::string BaseMapper::create_logger_name() const return ss.str(); } -const char* BaseMapper::get_mapper_name(void) const { return mapper_name.c_str(); } +const char* BaseMapper::get_mapper_name() const { return mapper_name.c_str(); } -Mapper::MapperSyncModel BaseMapper::get_mapper_sync_model(void) const +Legion::Mapping::Mapper::MapperSyncModel BaseMapper::get_mapper_sync_model() const { return SERIALIZED_REENTRANT_MAPPER_MODEL; } -void BaseMapper::select_task_options(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::select_task_options(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, TaskOptions& output) { #ifdef LEGATE_USE_COLLECTIVE @@ -232,27 +232,27 @@ void BaseMapper::select_task_options(const MapperContext ctx, options.push_back(TaskTarget::CPU); Task legate_task(&task, context, runtime, ctx); - auto target = task_target(legate_task, options); + auto target = legate_mapper_->task_target(legate_task, options); dispatch(target, [&output](auto& procs) { output.initial_proc = procs.front(); }); // We never want valid instances output.valid_instances = false; } -void BaseMapper::premap_task(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::premap_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const PremapTaskInput& input, PremapTaskOutput& output) { // NO-op since we know that all our futures should be mapped in the system memory } -void BaseMapper::slice_auto_task(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::slice_auto_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SliceTaskInput& input, SliceTaskOutput& output) { - ProjectionID projection = 0; + Legion::ProjectionID projection = 0; for (auto& req : task.regions) if (req.tag == LEGATE_CORE_KEY_STORE_TAG) { projection = req.projection; @@ -310,8 +310,7 @@ void BaseMapper::generate_prime_factors() if (local_cpus.size() > 0) generate_prime_factor(local_cpus, Processor::LOC_PROC); } -const std::vector BaseMapper::get_processor_grid(Legion::Processor::Kind kind, - int32_t ndim) +const std::vector BaseMapper::get_processor_grid(Processor::Kind kind, int32_t ndim) { auto key = std::make_pair(kind, ndim); auto finder = proc_grids.find(key); @@ -337,8 +336,8 @@ const std::vector BaseMapper::get_processor_grid(Legion::Processor::Kin return pitches; } -void BaseMapper::slice_manual_task(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::slice_manual_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SliceTaskInput& input, SliceTaskOutput& output) { @@ -358,8 +357,8 @@ void BaseMapper::slice_manual_task(const MapperContext ctx, dispatch(task.target_proc.kind(), distribute); } -void BaseMapper::slice_task(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::slice_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SliceTaskInput& input, SliceTaskOutput& output) { @@ -369,23 +368,25 @@ void BaseMapper::slice_task(const MapperContext ctx, slice_auto_task(ctx, task, input, output); } -bool BaseMapper::has_variant(const MapperContext ctx, const LegionTask& task, Processor::Kind kind) +bool BaseMapper::has_variant(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, + Processor::Kind kind) { return find_variant(ctx, task, kind).has_value(); } -std::optional BaseMapper::find_variant(const MapperContext ctx, - const LegionTask& task, - Processor::Kind kind) +std::optional BaseMapper::find_variant(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, + Processor::Kind kind) { const VariantCacheKey key(task.task_id, kind); auto finder = variants.find(key); if (finder != variants.end()) return finder->second; // Haven't seen it before so let's look it up to make sure it exists - std::vector avail_variants; + std::vector avail_variants; runtime->find_valid_variants(ctx, key.first, avail_variants, key.second); - std::optional result; + std::optional result; for (auto vid : avail_variants) { #ifdef DEBUG_LEGATE assert(vid > 0); @@ -404,13 +405,14 @@ std::optional BaseMapper::find_variant(const MapperContext ctx, return result; } -void BaseMapper::map_task(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::map_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const MapTaskInput& input, MapTaskOutput& output) { #ifdef DEBUG_LEGATE - logger.debug() << "Entering map_task for " << Utilities::to_string(runtime, ctx, task); + logger.debug() << "Entering map_task for " + << Legion::Mapping::Utilities::to_string(runtime, ctx, task); #endif // Should never be mapping the top-level task here @@ -429,7 +431,7 @@ void BaseMapper::map_task(const MapperContext ctx, const auto& options = default_store_targets(task.target_proc.kind()); - auto mappings = store_mappings(legate_task, options); + auto mappings = legate_mapper_->store_mappings(legate_task, options); auto validate_colocation = [this](const auto& mapping) { if (mapping.stores.empty()) { @@ -533,27 +535,28 @@ void BaseMapper::map_task(const MapperContext ctx, output.output_targets[req_idx] = get_target_memory(task.target_proc, mapping.policy.target); auto ndim = mapping.store().dim(); // FIXME: Unbound stores can have more than one dimension later - std::vector dimension_ordering; + std::vector dimension_ordering; for (int32_t dim = ndim - 1; dim >= 0; --dim) - dimension_ordering.push_back( - static_cast(static_cast(DimensionKind::LEGION_DIM_X) + dim)); - dimension_ordering.push_back(DimensionKind::LEGION_DIM_F); + dimension_ordering.push_back(static_cast( + static_cast(Legion::DimensionKind::LEGION_DIM_X) + dim)); + dimension_ordering.push_back(Legion::DimensionKind::LEGION_DIM_F); output.output_constraints[req_idx].ordering_constraint = - OrderingConstraint(dimension_ordering, false); + Legion::OrderingConstraint(dimension_ordering, false); } }; map_unbound_stores(for_unbound_stores); output.chosen_instances.resize(task.regions.size()); - std::map*> output_map; + std::map*> + output_map; for (uint32_t idx = 0; idx < task.regions.size(); ++idx) output_map[&task.regions[idx]] = &output.chosen_instances[idx]; map_legate_stores(ctx, task, for_stores, task.target_proc, output_map); } -void BaseMapper::map_replicate_task(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::map_replicate_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const MapTaskInput& input, const MapTaskOutput& def_output, MapReplicateTaskOutput& output) @@ -574,18 +577,18 @@ Memory BaseMapper::get_target_memory(Processor proc, StoreTarget target) return Memory::NO_MEMORY; } -void BaseMapper::map_legate_stores(const MapperContext ctx, - const Mappable& mappable, +void BaseMapper::map_legate_stores(const Legion::Mapping::MapperContext ctx, + const Legion::Mappable& mappable, std::vector& mappings, Processor target_proc, OutputMap& output_map) { auto try_mapping = [&](bool can_fail) { - const PhysicalInstance NO_INST{}; - std::vector instances; + const Legion::Mapping::PhysicalInstance NO_INST{}; + std::vector instances; for (auto& mapping : mappings) { - PhysicalInstance result = NO_INST; - auto reqs = mapping.requirements(); + Legion::Mapping::PhysicalInstance result = NO_INST; + auto reqs = mapping.requirements(); while (map_legate_store(ctx, mappable, mapping, reqs, target_proc, result, can_fail)) { if (NO_INST == result) { #ifdef DEBUG_LEGATE @@ -610,10 +613,10 @@ void BaseMapper::map_legate_stores(const MapperContext ctx, << " for reqs:" << reqs_ss.str(); #endif if ((*reqs.begin())->redop != 0) { - AutoLock lock(ctx, reduction_instances->manager_lock()); + Legion::Mapping::AutoLock lock(ctx, reduction_instances->manager_lock()); reduction_instances->erase(result); } else { - AutoLock lock(ctx, local_instances->manager_lock()); + Legion::Mapping::AutoLock lock(ctx, local_instances->manager_lock()); local_instances->erase(result); } result = NO_INST; @@ -649,14 +652,14 @@ void BaseMapper::map_legate_stores(const MapperContext ctx, } } -void BaseMapper::tighten_write_policies(const Mappable& mappable, +void BaseMapper::tighten_write_policies(const Legion::Mappable& mappable, std::vector& mappings) { for (auto& mapping : mappings) { // If the policy is exact, there's nothing we can tighten if (mapping.policy.exact) continue; - PrivilegeMode priv = LEGION_NO_ACCESS; + int32_t priv = LEGION_NO_ACCESS; for (auto* req : mapping.requirements()) priv |= req->privilege; // We tighten only write requirements if (!(priv & LEGION_WRITE_PRIV)) continue; @@ -671,22 +674,22 @@ void BaseMapper::tighten_write_policies(const Mappable& mappable, } } -bool BaseMapper::map_legate_store(const MapperContext ctx, - const Mappable& mappable, +bool BaseMapper::map_legate_store(const Legion::Mapping::MapperContext ctx, + const Legion::Mappable& mappable, const StoreMapping& mapping, - const std::set& reqs, + const std::set& reqs, Processor target_proc, - PhysicalInstance& result, + Legion::Mapping::PhysicalInstance& result, bool can_fail) { if (reqs.empty()) return false; const auto& policy = mapping.policy; - std::vector regions; + std::vector regions; for (auto* req : reqs) regions.push_back(req->region); auto target_memory = get_target_memory(target_proc, policy.target); - ReductionOpID redop = (*reqs.begin())->redop; + auto redop = (*reqs.begin())->redop; #ifdef DEBUG_LEGATE for (auto* req : reqs) { if (redop != req->redop) { @@ -699,7 +702,7 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, #endif // Generate layout constraints from the store mapping - LayoutConstraintSet layout_constraints; + Legion::LayoutConstraintSet layout_constraints; mapping.populate_layout_constraints(layout_constraints); auto& fields = layout_constraints.field_constraint.field_set; @@ -707,7 +710,7 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, if (redop != 0) { // We need to hold the instance manager lock as we're about to try // to find an instance - AutoLock reduction_lock(ctx, reduction_instances->manager_lock()); + Legion::Mapping::AutoLock reduction_lock(ctx, reduction_instances->manager_lock()); // This whole process has to appear atomic runtime->disable_reentrant(ctx); @@ -730,7 +733,8 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, } // if we didn't find it, create one - layout_constraints.add_constraint(SpecializedConstraint(REDUCTION_FOLD_SPECIALIZE, redop)); + layout_constraints.add_constraint( + Legion::SpecializedConstraint(REDUCTION_FOLD_SPECIALIZE, redop)); size_t footprint = 0; if (runtime->create_physical_instance(ctx, target_memory, @@ -745,7 +749,7 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, Realm::LoggerMessage msg = logger.debug(); msg << "Operation " << mappable.get_unique_id() << ": created reduction instance " << result << " for"; - for (LogicalRegion r : regions) msg << " " << r; + for (auto& r : regions) msg << " " << r; msg << " (size: " << footprint << " bytes, memory: " << target_memory << ")"; #endif if (target_proc.kind() == Processor::TOC_PROC) { @@ -764,7 +768,7 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, return true; } - AutoLock lock(ctx, local_instances->manager_lock()); + Legion::Mapping::AutoLock lock(ctx, local_instances->manager_lock()); runtime->disable_reentrant(ctx); // See if we already have it in our local instances if (fields.size() == 1 && regions.size() == 1 && @@ -790,7 +794,7 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, // that instance for all the tasks for the different regions. // First we have to see if there is anything we overlap with auto fid = fields.front(); - const IndexSpace is = regions.front().get_index_space(); + auto is = regions.front().get_index_space(); const Domain domain = runtime->get_index_space_domain(ctx, is); group = local_instances->find_region_group(regions.front(), domain, fid, target_memory, policy.exact); @@ -864,10 +868,10 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, return true; } -void BaseMapper::report_failed_mapping(const Mappable& mappable, +void BaseMapper::report_failed_mapping(const Legion::Mappable& mappable, uint32_t index, Memory target_memory, - ReductionOpID redop) + Legion::ReductionOpID redop) { static const char* memory_kinds[] = { #define MEM_NAMES(name, desc) desc, @@ -876,7 +880,7 @@ void BaseMapper::report_failed_mapping(const Mappable& mappable, }; std::string opname = ""; - if (mappable.get_mappable_type() == Mappable::TASK_MAPPABLE) { + if (mappable.get_mappable_type() == Legion::Mappable::TASK_MAPPABLE) { const auto task = mappable.as_task(); opname = task->get_task_name(); } @@ -902,8 +906,8 @@ void BaseMapper::report_failed_mapping(const Mappable& mappable, LEGATE_ABORT; } -void BaseMapper::select_task_variant(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::select_task_variant(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectVariantInput& input, SelectVariantOutput& output) { @@ -914,8 +918,8 @@ void BaseMapper::select_task_variant(const MapperContext ctx, output.chosen_variant = *variant; } -void BaseMapper::postmap_task(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::postmap_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const PostMapInput& input, PostMapOutput& output) { @@ -923,8 +927,8 @@ void BaseMapper::postmap_task(const MapperContext ctx, LEGATE_ABORT; } -void BaseMapper::select_task_sources(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::select_task_sources(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectTaskSrcInput& input, SelectTaskSrcOutput& output) { @@ -932,17 +936,18 @@ void BaseMapper::select_task_sources(const MapperContext ctx, ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } -void add_instance_to_band_ranking(const PhysicalInstance& instance, - const Legion::AddressSpace& local_node, - std::map& source_memories, - std::vector>& band_ranking, - const Memory& destination_memory, - const Legion::Machine& machine) +void add_instance_to_band_ranking( + const Legion::Mapping::PhysicalInstance& instance, + const Legion::AddressSpace& local_node, + std::map& source_memories, + std::vector>& band_ranking, + const Memory& destination_memory, + const Legion::Machine& machine) { Memory location = instance.get_location(); auto finder = source_memories.find(location); if (finder == source_memories.end()) { - std::vector affinity; + std::vector affinity; machine.get_mem_mem_affinity( affinity, location, destination_memory, false /*not just local affinities*/); uint32_t memory_bandwidth = 0; @@ -953,16 +958,19 @@ void add_instance_to_band_ranking(const PhysicalInstance& instance, memory_bandwidth = affinity[0].bandwidth; } source_memories[location] = memory_bandwidth; - band_ranking.push_back(std::pair(instance, memory_bandwidth)); + band_ranking.push_back( + std::pair(instance, memory_bandwidth)); } else - band_ranking.push_back(std::pair(instance, finder->second)); + band_ranking.push_back( + std::pair(instance, finder->second)); } -void BaseMapper::legate_select_sources(const MapperContext ctx, - const PhysicalInstance& target, - const std::vector& sources, - const std::vector& collective_sources, - std::deque& ranking) +void BaseMapper::legate_select_sources( + const Legion::Mapping::MapperContext ctx, + const Legion::Mapping::PhysicalInstance& target, + const std::vector& sources, + const std::vector& collective_sources, + std::deque& ranking) { std::map source_memories; // For right now we'll rank instances by the bandwidth of the memory @@ -970,22 +978,22 @@ void BaseMapper::legate_select_sources(const MapperContext ctx, // TODO: consider layouts when ranking source to help out the DMA system Memory destination_memory = target.get_location(); // fill in a vector of the sources with their bandwidths and sort them - std::vector> band_ranking; + std::vector> band_ranking; for (uint32_t idx = 0; idx < sources.size(); idx++) { - const PhysicalInstance& instance = sources[idx]; + const Legion::Mapping::PhysicalInstance& instance = sources[idx]; add_instance_to_band_ranking( instance, local_node, source_memories, band_ranking, destination_memory, machine); } for (uint32_t idx = 0; idx < collective_sources.size(); idx++) { - std::vector col_instances; + std::vector col_instances; collective_sources[idx].find_instances_nearest_memory(destination_memory, col_instances); #ifdef DEBUG_LEGATE // there must exist at least one instance in the collective view assert(!col_instances.empty()); #endif // we need only first instance if there are several - const PhysicalInstance& instance = col_instances[0]; + const Legion::Mapping::PhysicalInstance& instance = col_instances[0]; add_instance_to_band_ranking( instance, local_node, source_memories, band_ranking, destination_memory, machine); } @@ -1004,25 +1012,25 @@ void BaseMapper::legate_select_sources(const MapperContext ctx, ranking.push_back(it->first); } -void BaseMapper::speculate(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::speculate(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, SpeculativeOutput& output) { output.speculate = false; } -void BaseMapper::report_profiling(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::report_profiling(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const TaskProfilingInfo& input) { // Shouldn't get any profiling feedback currently LEGATE_ABORT; } -ShardingID BaseMapper::find_sharding_functor_by_key_store_projection( - const std::vector& requirements) +Legion::ShardingID BaseMapper::find_sharding_functor_by_key_store_projection( + const std::vector& requirements) { - ProjectionID proj_id = 0; + Legion::ProjectionID proj_id = 0; for (auto& requirement : requirements) if (LEGATE_CORE_KEY_STORE_TAG == requirement.tag) { proj_id = requirement.projection; @@ -1031,8 +1039,8 @@ ShardingID BaseMapper::find_sharding_functor_by_key_store_projection( return find_sharding_functor_by_projection_functor(proj_id); } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { @@ -1041,8 +1049,8 @@ void BaseMapper::select_sharding_functor(const MapperContext ctx, : find_sharding_functor_by_projection_functor(0); } -void BaseMapper::map_inline(const MapperContext ctx, - const InlineMapping& inline_op, +void BaseMapper::map_inline(const Legion::Mapping::MapperContext ctx, + const Legion::InlineMapping& inline_op, const MapInlineInput& input, MapInlineOutput& output) { @@ -1062,14 +1070,15 @@ void BaseMapper::map_inline(const MapperContext ctx, std::vector mappings; mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); - std::map*> output_map; + std::map*> + output_map; for (auto* req : mappings.front().requirements()) output_map[req] = &output.chosen_instances; map_legate_stores(ctx, inline_op, mappings, target_proc, output_map); } -void BaseMapper::select_inline_sources(const MapperContext ctx, - const InlineMapping& inline_op, +void BaseMapper::select_inline_sources(const Legion::Mapping::MapperContext ctx, + const Legion::InlineMapping& inline_op, const SelectInlineSrcInput& input, SelectInlineSrcOutput& output) { @@ -1077,16 +1086,16 @@ void BaseMapper::select_inline_sources(const MapperContext ctx, ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } -void BaseMapper::report_profiling(const MapperContext ctx, - const InlineMapping& inline_op, +void BaseMapper::report_profiling(const Legion::Mapping::MapperContext ctx, + const Legion::InlineMapping& inline_op, const InlineProfilingInfo& input) { // No profiling yet for inline mappings LEGATE_ABORT; } -void BaseMapper::map_copy(const MapperContext ctx, - const LegionCopy& copy, +void BaseMapper::map_copy(const Legion::Mapping::MapperContext ctx, + const Legion::Copy& copy, const MapCopyInput& input, MapCopyOutput& output) { @@ -1128,7 +1137,8 @@ void BaseMapper::map_copy(const MapperContext ctx, Copy legate_copy(©, runtime, ctx); - std::map*> output_map; + std::map*> + output_map; auto add_to_output_map = [&output_map](auto& reqs, auto& instances) { instances.resize(reqs.size()); for (uint32_t idx = 0; idx < reqs.size(); ++idx) output_map[&reqs[idx]] = &instances[idx]; @@ -1165,8 +1175,8 @@ void BaseMapper::map_copy(const MapperContext ctx, map_legate_stores(ctx, copy, mappings, target_proc, output_map); } -void BaseMapper::select_copy_sources(const MapperContext ctx, - const LegionCopy& copy, +void BaseMapper::select_copy_sources(const Legion::Mapping::MapperContext ctx, + const Legion::Copy& copy, const SelectCopySrcInput& input, SelectCopySrcOutput& output) { @@ -1174,23 +1184,23 @@ void BaseMapper::select_copy_sources(const MapperContext ctx, ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } -void BaseMapper::speculate(const MapperContext ctx, - const LegionCopy& copy, +void BaseMapper::speculate(const Legion::Mapping::MapperContext ctx, + const Legion::Copy& copy, SpeculativeOutput& output) { output.speculate = false; } -void BaseMapper::report_profiling(const MapperContext ctx, - const LegionCopy& copy, +void BaseMapper::report_profiling(const Legion::Mapping::MapperContext ctx, + const Legion::Copy& copy, const CopyProfilingInfo& input) { // No profiling for copies yet LEGATE_ABORT; } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const LegionCopy& copy, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Copy& copy, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { @@ -1198,8 +1208,8 @@ void BaseMapper::select_sharding_functor(const MapperContext ctx, output.chosen_functor = find_sharding_functor_by_projection_functor(0); } -void BaseMapper::select_close_sources(const MapperContext ctx, - const Close& close, +void BaseMapper::select_close_sources(const Legion::Mapping::MapperContext ctx, + const Legion::Close& close, const SelectCloseSrcInput& input, SelectCloseSrcOutput& output) { @@ -1207,63 +1217,63 @@ void BaseMapper::select_close_sources(const MapperContext ctx, ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } -void BaseMapper::report_profiling(const MapperContext ctx, - const Close& close, +void BaseMapper::report_profiling(const Legion::Mapping::MapperContext ctx, + const Legion::Close& close, const CloseProfilingInfo& input) { // No profiling yet for legate LEGATE_ABORT; } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const Close& close, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Close& close, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { LEGATE_ABORT; } -void BaseMapper::map_acquire(const MapperContext ctx, - const Acquire& acquire, +void BaseMapper::map_acquire(const Legion::Mapping::MapperContext ctx, + const Legion::Acquire& acquire, const MapAcquireInput& input, MapAcquireOutput& output) { // Nothing to do } -void BaseMapper::speculate(const MapperContext ctx, - const Acquire& acquire, +void BaseMapper::speculate(const Legion::Mapping::MapperContext ctx, + const Legion::Acquire& acquire, SpeculativeOutput& output) { output.speculate = false; } -void BaseMapper::report_profiling(const MapperContext ctx, - const Acquire& acquire, +void BaseMapper::report_profiling(const Legion::Mapping::MapperContext ctx, + const Legion::Acquire& acquire, const AcquireProfilingInfo& input) { // No profiling for legate yet LEGATE_ABORT; } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const Acquire& acquire, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Acquire& acquire, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { LEGATE_ABORT; } -void BaseMapper::map_release(const MapperContext ctx, - const Release& release, +void BaseMapper::map_release(const Legion::Mapping::MapperContext ctx, + const Legion::Release& release, const MapReleaseInput& input, MapReleaseOutput& output) { // Nothing to do } -void BaseMapper::select_release_sources(const MapperContext ctx, - const Release& release, +void BaseMapper::select_release_sources(const Legion::Mapping::MapperContext ctx, + const Legion::Release& release, const SelectReleaseSrcInput& input, SelectReleaseSrcOutput& output) { @@ -1271,31 +1281,31 @@ void BaseMapper::select_release_sources(const MapperContext ctx, ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } -void BaseMapper::speculate(const MapperContext ctx, - const Release& release, +void BaseMapper::speculate(const Legion::Mapping::MapperContext ctx, + const Legion::Release& release, SpeculativeOutput& output) { output.speculate = false; } -void BaseMapper::report_profiling(const MapperContext ctx, - const Release& release, +void BaseMapper::report_profiling(const Legion::Mapping::MapperContext ctx, + const Legion::Release& release, const ReleaseProfilingInfo& input) { // No profiling for legate yet LEGATE_ABORT; } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const Release& release, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Release& release, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { LEGATE_ABORT; } -void BaseMapper::select_partition_projection(const MapperContext ctx, - const Partition& partition, +void BaseMapper::select_partition_projection(const Legion::Mapping::MapperContext ctx, + const Legion::Partition& partition, const SelectPartitionProjectionInput& input, SelectPartitionProjectionOutput& output) { @@ -1303,11 +1313,11 @@ void BaseMapper::select_partition_projection(const MapperContext ctx, if (!input.open_complete_partitions.empty()) output.chosen_partition = input.open_complete_partitions[0]; else - output.chosen_partition = LogicalPartition::NO_PART; + output.chosen_partition = Legion::LogicalPartition::NO_PART; } -void BaseMapper::map_partition(const MapperContext ctx, - const Partition& partition, +void BaseMapper::map_partition(const Legion::Mapping::MapperContext ctx, + const Legion::Partition& partition, const MapPartitionInput& input, MapPartitionOutput& output) { @@ -1327,14 +1337,15 @@ void BaseMapper::map_partition(const MapperContext ctx, std::vector mappings; mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); - std::map*> output_map; + std::map*> + output_map; for (auto* req : mappings.front().requirements()) output_map[req] = &output.chosen_instances; map_legate_stores(ctx, partition, mappings, target_proc, output_map); } -void BaseMapper::select_partition_sources(const MapperContext ctx, - const Partition& partition, +void BaseMapper::select_partition_sources(const Legion::Mapping::MapperContext ctx, + const Legion::Partition& partition, const SelectPartitionSrcInput& input, SelectPartitionSrcOutput& output) { @@ -1342,24 +1353,24 @@ void BaseMapper::select_partition_sources(const MapperContext ctx, ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } -void BaseMapper::report_profiling(const MapperContext ctx, - const Partition& partition, +void BaseMapper::report_profiling(const Legion::Mapping::MapperContext ctx, + const Legion::Partition& partition, const PartitionProfilingInfo& input) { // No profiling yet LEGATE_ABORT; } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const Partition& partition, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Partition& partition, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { output.chosen_functor = find_sharding_functor_by_projection_functor(0); } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const Fill& fill, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Fill& fill, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { @@ -1368,26 +1379,26 @@ void BaseMapper::select_sharding_functor(const MapperContext ctx, : find_sharding_functor_by_projection_functor(0); } -void BaseMapper::configure_context(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::configure_context(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, ContextConfigOutput& output) { // Use the defaults currently } -void BaseMapper::select_tunable_value(const MapperContext ctx, - const LegionTask& task, +void BaseMapper::select_tunable_value(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectTunableInput& input, SelectTunableOutput& output) { - auto value = tunable_value(input.tunable_id); + auto value = legate_mapper_->tunable_value(input.tunable_id); output.size = value.size(); output.value = malloc(output.size); memcpy(output.value, value.ptr(), output.size); } -void BaseMapper::select_sharding_functor(const MapperContext ctx, - const MustEpoch& epoch, +void BaseMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::MustEpoch& epoch, const SelectShardingFunctorInput& input, MustEpochShardingFunctorOutput& output) { @@ -1395,15 +1406,15 @@ void BaseMapper::select_sharding_functor(const MapperContext ctx, LEGATE_ABORT; } -void BaseMapper::memoize_operation(const MapperContext ctx, - const Mappable& mappable, +void BaseMapper::memoize_operation(const Legion::Mapping::MapperContext ctx, + const Legion::Mappable& mappable, const MemoizeInput& input, MemoizeOutput& output) { LEGATE_ABORT; } -void BaseMapper::map_must_epoch(const MapperContext ctx, +void BaseMapper::map_must_epoch(const Legion::Mapping::MapperContext ctx, const MapMustEpochInput& input, MapMustEpochOutput& output) { @@ -1411,7 +1422,7 @@ void BaseMapper::map_must_epoch(const MapperContext ctx, LEGATE_ABORT; } -void BaseMapper::map_dataflow_graph(const MapperContext ctx, +void BaseMapper::map_dataflow_graph(const Legion::Mapping::MapperContext ctx, const MapDataflowGraphInput& input, MapDataflowGraphOutput& output) { @@ -1419,7 +1430,7 @@ void BaseMapper::map_dataflow_graph(const MapperContext ctx, LEGATE_ABORT; } -void BaseMapper::select_tasks_to_map(const MapperContext ctx, +void BaseMapper::select_tasks_to_map(const Legion::Mapping::MapperContext ctx, const SelectMappingInput& input, SelectMappingOutput& output) { @@ -1427,14 +1438,14 @@ void BaseMapper::select_tasks_to_map(const MapperContext ctx, for (auto task : input.ready_tasks) output.map_tasks.insert(task); } -void BaseMapper::select_steal_targets(const MapperContext ctx, +void BaseMapper::select_steal_targets(const Legion::Mapping::MapperContext ctx, const SelectStealingInput& input, SelectStealingOutput& output) { // Nothing to do, no stealing in the leagte mapper currently } -void BaseMapper::permit_steal_request(const MapperContext ctx, +void BaseMapper::permit_steal_request(const Legion::Mapping::MapperContext ctx, const StealRequestInput& input, StealRequestOutput& output) { @@ -1442,13 +1453,15 @@ void BaseMapper::permit_steal_request(const MapperContext ctx, LEGATE_ABORT; } -void BaseMapper::handle_message(const MapperContext ctx, const MapperMessage& message) +void BaseMapper::handle_message(const Legion::Mapping::MapperContext ctx, + const MapperMessage& message) { // We shouldn't be receiving any messages currently LEGATE_ABORT; } -void BaseMapper::handle_task_result(const MapperContext ctx, const MapperTaskResult& result) +void BaseMapper::handle_task_result(const Legion::Mapping::MapperContext ctx, + const MapperTaskResult& result) { // Nothing to do since we should never get one of these LEGATE_ABORT; diff --git a/src/core/mapping/base_mapper.h b/src/core/mapping/base_mapper.h index 86e558e0b..32d6ca2cb 100644 --- a/src/core/mapping/base_mapper.h +++ b/src/core/mapping/base_mapper.h @@ -38,10 +38,13 @@ enum class Strictness : bool { hint = false, }; -class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { +class BaseMapper : public Legion::Mapping::Mapper, public MachineQueryInterface { public: - BaseMapper(Legion::Runtime* rt, Legion::Machine machine, const LibraryContext& context); - virtual ~BaseMapper(void); + BaseMapper(std::unique_ptr legate_mapper, + Legion::Runtime* rt, + Legion::Machine machine, + const LibraryContext& context); + virtual ~BaseMapper(); private: BaseMapper(const BaseMapper& rhs) = delete; @@ -49,15 +52,22 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { protected: // Start-up methods - static Legion::AddressSpaceID get_local_node(void); + static Legion::AddressSpaceID get_local_node(); static size_t get_total_nodes(Legion::Machine m); std::string create_name(Legion::AddressSpace node) const; std::string create_logger_name() const; public: - virtual const char* get_mapper_name(void) const override; - virtual Legion::Mapping::Mapper::MapperSyncModel get_mapper_sync_model(void) const override; - virtual bool request_valid_instances(void) const override { return false; } + // MachineQueryInterface + virtual const std::vector& cpus() const override { return local_cpus; } + virtual const std::vector& gpus() const override { return local_gpus; } + virtual const std::vector& omps() const override { return local_omps; } + virtual uint32_t total_nodes() const override { return total_nodes_; } + + public: + virtual const char* get_mapper_name() const override; + virtual Legion::Mapping::Mapper::MapperSyncModel get_mapper_sync_model() const override; + virtual bool request_valid_instances() const override { return false; } public: // Task mapping calls virtual void select_task_options(const Legion::Mapping::MapperContext ctx, @@ -257,13 +267,13 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { const MapperTaskResult& result) override; protected: - Legion::Memory get_target_memory(Legion::Processor proc, StoreTarget target); + Memory get_target_memory(Processor proc, StoreTarget target); using OutputMap = std::map*>; void map_legate_stores(const Legion::Mapping::MapperContext ctx, const Legion::Mappable& mappable, std::vector& mappings, - Legion::Processor target_proc, + Processor target_proc, OutputMap& output_map); void tighten_write_policies(const Legion::Mappable& mappable, std::vector& mappings); @@ -271,12 +281,12 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { const Legion::Mappable& mappable, const StoreMapping& mapping, const std::set& reqs, - Legion::Processor target_proc, + Processor target_proc, Legion::Mapping::PhysicalInstance& result, bool can_fail); void report_failed_mapping(const Legion::Mappable& mappable, unsigned index, - Legion::Memory target_memory, + Memory target_memory, Legion::ReductionOpID redop); void legate_select_sources(const Legion::Mapping::MapperContext ctx, const Legion::Mapping::PhysicalInstance& target, @@ -287,15 +297,14 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { protected: bool has_variant(const Legion::Mapping::MapperContext ctx, const Legion::Task& task, - Legion::Processor::Kind kind); + Processor::Kind kind); std::optional find_variant(const Legion::Mapping::MapperContext ctx, const Legion::Task& task, - Legion::Processor::Kind kind); + Processor::Kind kind); private: void generate_prime_factors(); - void generate_prime_factor(const std::vector& processors, - Legion::Processor::Kind kind); + void generate_prime_factor(const std::vector& processors, Processor::Kind kind); protected: template @@ -310,12 +319,12 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { return functor(local_cpus); } template - decltype(auto) dispatch(Legion::Processor::Kind kind, Functor functor) + decltype(auto) dispatch(Processor::Kind kind, Functor functor) { switch (kind) { - case Legion::Processor::LOC_PROC: return functor(local_cpus); - case Legion::Processor::TOC_PROC: return functor(local_gpus); - case Legion::Processor::OMP_PROC: return functor(local_omps); + case Processor::LOC_PROC: return functor(local_cpus); + case Processor::TOC_PROC: return functor(local_gpus); + case Processor::OMP_PROC: return functor(local_omps); default: LEGATE_ABORT; } assert(false); @@ -323,7 +332,7 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { } protected: - const std::vector get_processor_grid(Legion::Processor::Kind kind, int32_t ndim); + const std::vector get_processor_grid(Processor::Kind kind, int32_t ndim); void slice_auto_task(const Legion::Mapping::MapperContext ctx, const Legion::Task& task, const SliceTaskInput& input, @@ -344,28 +353,30 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { { return (left.second < right.second); } - // NumPyOpCode decode_task_id(Legion::TaskID tid); + + private: + std::unique_ptr legate_mapper_; public: Legion::Runtime* const legion_runtime; const Legion::Machine machine; const LibraryContext context; const Legion::AddressSpace local_node; - const size_t total_nodes; const std::string mapper_name; Legion::Logger logger; protected: - std::vector local_cpus; - std::vector local_gpus; - std::vector local_omps; // OpenMP processors + const size_t total_nodes_; + std::vector local_cpus; + std::vector local_gpus; + std::vector local_omps; // OpenMP processors protected: - Legion::Memory local_system_memory, local_zerocopy_memory; - std::map local_frame_buffers; - std::map local_numa_domains; + Memory local_system_memory, local_zerocopy_memory; + std::map local_frame_buffers; + std::map local_numa_domains; protected: - using VariantCacheKey = std::pair; + using VariantCacheKey = std::pair; std::map> variants; protected: @@ -374,8 +385,8 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { protected: // Used for n-D cyclic distribution - std::map> all_factors; - std::map, std::vector> proc_grids; + std::map> all_factors; + std::map, std::vector> proc_grids; protected: // These are used for computing sharding functions diff --git a/src/core/mapping/core_mapper.cc b/src/core/mapping/core_mapper.cc index bb9389108..0d09e3c7d 100644 --- a/src/core/mapping/core_mapper.cc +++ b/src/core/mapping/core_mapper.cc @@ -24,12 +24,10 @@ #endif #include "core/task/task.h" #include "core/utilities/linearize.h" +#include "core/utilities/typedefs.h" namespace legate { -using namespace Legion; -using namespace Legion::Mapping; - uint32_t extract_env(const char* env_name, const uint32_t default_value, const uint32_t test_value) { const char* env_value = getenv(env_name); @@ -48,61 +46,65 @@ uint32_t extract_env(const char* env_name, const uint32_t default_value, const u // should be overriding this mapper so we burry it in here class CoreMapper : public Legion::Mapping::NullMapper { public: - CoreMapper(MapperRuntime* runtime, Machine machine, const LibraryContext& context); - virtual ~CoreMapper(void); + CoreMapper(Legion::Mapping::MapperRuntime* runtime, + Legion::Machine machine, + const LibraryContext& context); + virtual ~CoreMapper(); public: // Start-up methods - static AddressSpaceID get_local_node(void); - static size_t get_total_nodes(Machine m); - static const char* create_name(AddressSpace node); + static Legion::AddressSpaceID get_local_node(); + static size_t get_total_nodes(Legion::Machine m); + static const char* create_name(Legion::AddressSpace node); public: - virtual const char* get_mapper_name(void) const; - virtual MapperSyncModel get_mapper_sync_model(void) const; - virtual bool request_valid_instances(void) const { return false; } + virtual const char* get_mapper_name() const override; + virtual Legion::Mapping::Mapper::MapperSyncModel get_mapper_sync_model() const override; + virtual bool request_valid_instances() const { return false; } public: // Task mapping calls - virtual void select_task_options(const MapperContext ctx, const Task& task, TaskOptions& output); - virtual void slice_task(const MapperContext ctx, - const Task& task, + virtual void select_task_options(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, + TaskOptions& output); + virtual void slice_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SliceTaskInput& input, SliceTaskOutput& output); - virtual void map_task(const MapperContext ctx, - const Task& task, + virtual void map_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const MapTaskInput& input, MapTaskOutput& output); - virtual void select_sharding_functor(const MapperContext ctx, - const Task& task, + virtual void select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output); - virtual void select_steal_targets(const MapperContext ctx, + virtual void select_steal_targets(const Legion::Mapping::MapperContext ctx, const SelectStealingInput& input, SelectStealingOutput& output); - virtual void select_tasks_to_map(const MapperContext ctx, + virtual void select_tasks_to_map(const Legion::Mapping::MapperContext ctx, const SelectMappingInput& input, SelectMappingOutput& output); public: - virtual void configure_context(const MapperContext ctx, - const Task& task, + virtual void configure_context(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, ContextConfigOutput& output); - void map_future_map_reduction(const MapperContext ctx, + void map_future_map_reduction(const Legion::Mapping::MapperContext ctx, const FutureMapReductionInput& input, FutureMapReductionOutput& output); - virtual void select_tunable_value(const MapperContext ctx, - const Task& task, + virtual void select_tunable_value(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectTunableInput& input, SelectTunableOutput& output); protected: template - decltype(auto) dispatch(Legion::Processor::Kind kind, Functor functor) + decltype(auto) dispatch(Processor::Kind kind, Functor functor) { switch (kind) { - case Legion::Processor::LOC_PROC: return functor(local_cpus); - case Legion::Processor::TOC_PROC: return functor(local_gpus); - case Legion::Processor::OMP_PROC: return functor(local_omps); + case Processor::LOC_PROC: return functor(local_cpus); + case Processor::TOC_PROC: return functor(local_gpus); + case Processor::OMP_PROC: return functor(local_omps); default: LEGATE_ABORT; } assert(false); @@ -110,7 +112,7 @@ class CoreMapper : public Legion::Mapping::NullMapper { } public: - const AddressSpace local_node; + const Legion::AddressSpace local_node; const size_t total_nodes; const char* const mapper_name; LibraryContext context; @@ -138,7 +140,9 @@ class CoreMapper : public Legion::Mapping::NullMapper { std::map local_numa_domains; }; -CoreMapper::CoreMapper(MapperRuntime* rt, Machine m, const LibraryContext& c) +CoreMapper::CoreMapper(Legion::Mapping::MapperRuntime* rt, + Legion::Machine m, + const LibraryContext& c) : NullMapper(rt, m), local_node(get_local_node()), total_nodes(get_total_nodes(m)), @@ -164,9 +168,10 @@ CoreMapper::CoreMapper(MapperRuntime* rt, Machine m, const LibraryContext& c) has_socket_mem(false) { // Query to find all our local processors - Machine::ProcessorQuery local_procs(machine); + Legion::Machine::ProcessorQuery local_procs(machine); local_procs.local_address_space(); - for (Machine::ProcessorQuery::iterator it = local_procs.begin(); it != local_procs.end(); it++) { + for (Legion::Machine::ProcessorQuery::iterator it = local_procs.begin(); it != local_procs.end(); + it++) { switch (it->kind()) { case Processor::LOC_PROC: { local_cpus.push_back(*it); @@ -184,20 +189,20 @@ CoreMapper::CoreMapper(MapperRuntime* rt, Machine m, const LibraryContext& c) } } // Now do queries to find all our local memories - Machine::MemoryQuery local_sysmem(machine); + Legion::Machine::MemoryQuery local_sysmem(machine); local_sysmem.local_address_space(); local_sysmem.only_kind(Memory::SYSTEM_MEM); assert(local_sysmem.count() > 0); local_system_memory = local_sysmem.first(); if (!local_gpus.empty()) { - Machine::MemoryQuery local_zcmem(machine); + Legion::Machine::MemoryQuery local_zcmem(machine); local_zcmem.local_address_space(); local_zcmem.only_kind(Memory::Z_COPY_MEM); assert(local_zcmem.count() > 0); local_zerocopy_memory = local_zcmem.first(); } for (auto local_gpu : local_gpus) { - Machine::MemoryQuery local_framebuffer(machine); + Legion::Machine::MemoryQuery local_framebuffer(machine); local_framebuffer.local_address_space(); local_framebuffer.only_kind(Memory::GPU_FB_MEM); local_framebuffer.best_affinity_to(local_gpu); @@ -205,7 +210,7 @@ CoreMapper::CoreMapper(MapperRuntime* rt, Machine m, const LibraryContext& c) local_frame_buffers[local_gpu] = local_framebuffer.first(); } for (auto local_omp : local_omps) { - Machine::MemoryQuery local_numa(machine); + Legion::Machine::MemoryQuery local_numa(machine); local_numa.local_address_space(); local_numa.only_kind(Memory::SOCKET_MEM); local_numa.best_affinity_to(local_omp); @@ -218,39 +223,40 @@ CoreMapper::CoreMapper(MapperRuntime* rt, Machine m, const LibraryContext& c) } } -CoreMapper::~CoreMapper(void) { free(const_cast(mapper_name)); } +CoreMapper::~CoreMapper() { free(const_cast(mapper_name)); } -/*static*/ AddressSpace CoreMapper::get_local_node(void) +/*static*/ Legion::AddressSpace CoreMapper::get_local_node() { Processor p = Processor::get_executing_processor(); return p.address_space(); } -/*static*/ size_t CoreMapper::get_total_nodes(Machine m) +/*static*/ size_t CoreMapper::get_total_nodes(Legion::Machine m) { - Machine::ProcessorQuery query(m); + Legion::Machine::ProcessorQuery query(m); query.only_kind(Processor::LOC_PROC); - std::set spaces; - for (Machine::ProcessorQuery::iterator it = query.begin(); it != query.end(); it++) - spaces.insert(it->address_space()); + std::set spaces; + for (auto it = query.begin(); it != query.end(); it++) spaces.insert(it->address_space()); return spaces.size(); } -/*static*/ const char* CoreMapper::create_name(AddressSpace node) +/*static*/ const char* CoreMapper::create_name(Legion::AddressSpace node) { char buffer[128]; snprintf(buffer, 127, "Legate Mapper on Node %d", node); return strdup(buffer); } -const char* CoreMapper::get_mapper_name(void) const { return mapper_name; } +const char* CoreMapper::get_mapper_name() const { return mapper_name; } -Mapper::MapperSyncModel CoreMapper::get_mapper_sync_model(void) const +Legion::Mapping::Mapper::MapperSyncModel CoreMapper::get_mapper_sync_model() const { return SERIALIZED_REENTRANT_MAPPER_MODEL; } -void CoreMapper::select_task_options(const MapperContext ctx, const Task& task, TaskOptions& output) +void CoreMapper::select_task_options(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, + TaskOptions& output) { assert(context.valid_task_id(task.task_id)); if (task.tag == LEGATE_CPU_VARIANT) { @@ -266,8 +272,8 @@ void CoreMapper::select_task_options(const MapperContext ctx, const Task& task, } } -void CoreMapper::slice_task(const MapperContext ctx, - const Task& task, +void CoreMapper::slice_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SliceTaskInput& input, SliceTaskOutput& output) { @@ -291,8 +297,8 @@ void CoreMapper::slice_task(const MapperContext ctx, dispatch(task.target_proc.kind(), round_robin); } -void CoreMapper::map_task(const MapperContext ctx, - const Task& task, +void CoreMapper::map_task(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const MapTaskInput& input, MapTaskOutput& output) { @@ -302,8 +308,8 @@ void CoreMapper::map_task(const MapperContext ctx, output.chosen_variant = task.tag; } -void CoreMapper::select_sharding_functor(const MapperContext ctx, - const Task& task, +void CoreMapper::select_sharding_functor(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { @@ -314,29 +320,29 @@ void CoreMapper::select_sharding_functor(const MapperContext ctx, output.chosen_functor = context.get_sharding_id(LEGATE_CORE_TOPLEVEL_TASK_SHARD_ID); } -void CoreMapper::select_steal_targets(const MapperContext ctx, +void CoreMapper::select_steal_targets(const Legion::Mapping::MapperContext ctx, const SelectStealingInput& input, SelectStealingOutput& output) { // Do nothing } -void CoreMapper::select_tasks_to_map(const MapperContext ctx, +void CoreMapper::select_tasks_to_map(const Legion::Mapping::MapperContext ctx, const SelectMappingInput& input, SelectMappingOutput& output) { output.map_tasks.insert(input.ready_tasks.begin(), input.ready_tasks.end()); } -void CoreMapper::configure_context(const MapperContext ctx, - const Task& task, +void CoreMapper::configure_context(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, ContextConfigOutput& output) { // Use the defaults currently } template -void pack_tunable(const T value, Mapper::SelectTunableOutput& output) +void pack_tunable(const T value, Legion::Mapping::Mapper::SelectTunableOutput& output) { T* result = static_cast(malloc(sizeof(value))); *result = value; @@ -344,7 +350,7 @@ void pack_tunable(const T value, Mapper::SelectTunableOutput& output) output.size = sizeof(value); } -void CoreMapper::map_future_map_reduction(const MapperContext ctx, +void CoreMapper::map_future_map_reduction(const Legion::Mapping::MapperContext ctx, const FutureMapReductionInput& input, FutureMapReductionOutput& output) { @@ -368,8 +374,8 @@ void CoreMapper::map_future_map_reduction(const MapperContext ctx, for (auto& pair : local_numa_domains) output.destination_memories.push_back(pair.second); } -void CoreMapper::select_tunable_value(const MapperContext ctx, - const Task& task, +void CoreMapper::select_tunable_value(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, const SelectTunableInput& input, SelectTunableOutput& output) { @@ -468,7 +474,9 @@ void CoreMapper::select_tunable_value(const MapperContext ctx, LEGATE_ABORT; } -void register_legate_core_mapper(Machine machine, Runtime* runtime, const LibraryContext& context) +void register_legate_core_mapper(Legion::Machine machine, + Legion::Runtime* runtime, + const LibraryContext& context) { // Replace all the default mappers with our custom mapper for the Legate // top-level task and init task diff --git a/src/core/mapping/instance_manager.cc b/src/core/mapping/instance_manager.cc index f732b0b79..c4416cd83 100644 --- a/src/core/mapping/instance_manager.cc +++ b/src/core/mapping/instance_manager.cc @@ -20,9 +20,6 @@ namespace legate { namespace mapping { -using namespace Legion; -using namespace Legion::Mapping; - using RegionGroupP = std::shared_ptr; static Legion::Logger log_instmgr("instmgr"); @@ -37,9 +34,9 @@ RegionGroup::RegionGroup(std::set&& rs, const Domain bound) { } -std::vector RegionGroup::get_regions() const +std::vector RegionGroup::get_regions() const { - std::vector result; + std::vector result; result.insert(result.end(), regions.begin(), regions.end()); return std::move(result); } @@ -106,7 +103,7 @@ static inline bool too_big(size_t union_volume, struct construct_overlapping_region_group_fn { template RegionGroupP operator()(const InstanceSet::Region& region, - const InstanceSet::Domain& domain, + const Domain& domain, const std::map& instances) { auto bound = domain.bounds(); @@ -159,7 +156,7 @@ struct construct_overlapping_region_group_fn { bound_vol = union_vol; } - return std::make_shared(std::move(regions), InstanceSet::Domain(bound)); + return std::make_shared(std::move(regions), Domain(bound)); } }; @@ -241,7 +238,7 @@ std::set InstanceSet::record_instance(RegionGroupP group, return std::move(replaced); } -bool InstanceSet::erase(PhysicalInstance inst) +bool InstanceSet::erase(Instance inst) { std::set filtered_groups; #ifdef DEBUG_LEGATE @@ -333,7 +330,7 @@ void ReductionInstanceSet::record_instance(ReductionOpID& redop, } } -bool ReductionInstanceSet::erase(PhysicalInstance inst) +bool ReductionInstanceSet::erase(Instance inst) { for (auto it = instances_.begin(); it != instances_.end(); /*nothing*/) { if (it->second.instance == inst) { @@ -390,7 +387,7 @@ std::set InstanceManager::record_instance( return instance_sets_[key].record_instance(group, instance, policy); } -void InstanceManager::erase(PhysicalInstance inst) +void InstanceManager::erase(Instance inst) { const auto mem = inst.get_location(); const auto tid = inst.get_tree_id(); @@ -408,9 +405,9 @@ void InstanceManager::erase(PhysicalInstance inst) } } -std::map InstanceManager::aggregate_instance_sizes() const +std::map InstanceManager::aggregate_instance_sizes() const { - std::map result; + std::map result; for (auto& pair : instance_sets_) { auto& memory = pair.first.memory; if (result.find(memory) == result.end()) result[memory] = 0; @@ -459,7 +456,7 @@ void ReductionInstanceManager::record_instance(ReductionOpID& redop, } } -void ReductionInstanceManager::erase(PhysicalInstance inst) +void ReductionInstanceManager::erase(Instance inst) { const auto mem = inst.get_location(); const auto tid = inst.get_tree_id(); diff --git a/src/core/mapping/instance_manager.h b/src/core/mapping/instance_manager.h index c42df3119..35a49befb 100644 --- a/src/core/mapping/instance_manager.h +++ b/src/core/mapping/instance_manager.h @@ -30,7 +30,6 @@ namespace mapping { struct RegionGroup { public: using Region = Legion::LogicalRegion; - using Domain = Legion::Domain; public: RegionGroup(const std::set& regions, const Domain bounding_box); @@ -56,7 +55,6 @@ struct InstanceSet { public: using Region = Legion::LogicalRegion; using Instance = Legion::Mapping::PhysicalInstance; - using Domain = Legion::Domain; using RegionGroupP = std::shared_ptr; public: @@ -99,7 +97,6 @@ class ReductionInstanceSet { public: using Region = Legion::LogicalRegion; using Instance = Legion::Mapping::PhysicalInstance; - using Domain = Legion::Domain; using ReductionOpID = Legion::ReductionOpID; public: @@ -141,9 +138,7 @@ class BaseInstanceManager { using Region = Legion::LogicalRegion; using RegionTreeID = Legion::RegionTreeID; using Instance = Legion::Mapping::PhysicalInstance; - using Domain = Legion::Domain; using FieldID = Legion::FieldID; - using Memory = Legion::Memory; public: struct FieldMemInfo { @@ -206,7 +201,7 @@ class InstanceManager : public BaseInstanceManager { static InstanceManager* get_instance_manager(); public: - std::map aggregate_instance_sizes() const; + std::map aggregate_instance_sizes() const; private: std::map instance_sets_{}; diff --git a/src/core/mapping/mapping.cc b/src/core/mapping/mapping.cc index 5d1aa971c..3e092708d 100644 --- a/src/core/mapping/mapping.cc +++ b/src/core/mapping/mapping.cc @@ -20,8 +20,6 @@ #include "core/mapping/mapping.h" -using namespace Legion; - namespace legate { namespace mapping { @@ -44,7 +42,7 @@ bool DimOrdering::operator==(const DimOrdering& other) const } void DimOrdering::populate_dimension_ordering(const Store& store, - std::vector& ordering) const + std::vector& ordering) const { // TODO: We need to implement the relative dimension ordering assert(!relative); @@ -52,17 +50,17 @@ void DimOrdering::populate_dimension_ordering(const Store& store, case Kind::C: { auto dim = store.region_field().dim(); for (int32_t idx = dim - 1; idx >= 0; --idx) - ordering.push_back(static_cast(DIM_X + idx)); + ordering.push_back(static_cast(DIM_X + idx)); break; } case Kind::FORTRAN: { auto dim = store.region_field().dim(); for (int32_t idx = 0; idx < dim; ++idx) - ordering.push_back(static_cast(DIM_X + idx)); + ordering.push_back(static_cast(DIM_X + idx)); break; } case Kind::CUSTOM: { - for (auto idx : dims) ordering.push_back(static_cast(DIM_X + idx)); + for (auto idx : dims) ordering.push_back(static_cast(DIM_X + idx)); break; } } @@ -92,15 +90,16 @@ bool InstanceMappingPolicy::operator!=(const InstanceMappingPolicy& other) const void InstanceMappingPolicy::populate_layout_constraints( const Store& store, Legion::LayoutConstraintSet& layout_constraints) const { - std::vector dimension_ordering{}; + std::vector dimension_ordering{}; if (layout == InstLayout::AOS) dimension_ordering.push_back(DIM_F); ordering.populate_dimension_ordering(store, dimension_ordering); if (layout == InstLayout::SOA) dimension_ordering.push_back(DIM_F); - layout_constraints.add_constraint(OrderingConstraint(dimension_ordering, false /*contiguous*/)); + layout_constraints.add_constraint( + Legion::OrderingConstraint(dimension_ordering, false /*contiguous*/)); - layout_constraints.add_constraint(MemoryConstraint(get_memory_kind(target))); + layout_constraints.add_constraint(Legion::MemoryConstraint(get_memory_kind(target))); } /*static*/ InstanceMappingPolicy InstanceMappingPolicy::default_policy(StoreTarget target, @@ -160,9 +159,9 @@ std::set StoreMapping::requirement_indices() const return std::move(indices); } -std::set StoreMapping::requirements() const +std::set StoreMapping::requirements() const { - std::set reqs; + std::set reqs; for (auto& store : stores) { if (store.is_future()) continue; auto* req = store.region_field().get_requirement(); @@ -177,9 +176,9 @@ void StoreMapping::populate_layout_constraints( { policy.populate_layout_constraints(stores.front(), layout_constraints); - std::vector fields{}; + std::vector fields{}; if (stores.size() > 1) { - std::set field_set{}; + std::set field_set{}; for (auto& store : stores) { auto field_id = store.region_field().field_id(); if (field_set.find(field_id) == field_set.end()) { @@ -189,7 +188,7 @@ void StoreMapping::populate_layout_constraints( } } else fields.push_back(stores.front().region_field().field_id()); - layout_constraints.add_constraint(FieldConstraint(fields, true /*contiguous*/)); + layout_constraints.add_constraint(Legion::FieldConstraint(fields, true /*contiguous*/)); } /*static*/ StoreMapping StoreMapping::default_mapping(const Store& store, diff --git a/src/core/mapping/mapping.h b/src/core/mapping/mapping.h index 2d56d1a67..bb1b92b3c 100644 --- a/src/core/mapping/mapping.h +++ b/src/core/mapping/mapping.h @@ -17,6 +17,7 @@ #pragma once #include "core/mapping/operation.h" +#include "core/utilities/typedefs.h" namespace legate { namespace mapping { @@ -108,7 +109,8 @@ struct InstanceMappingPolicy { bool operator==(const InstanceMappingPolicy&) const; bool operator!=(const InstanceMappingPolicy&) const; - public: + private: + friend class StoreMapping; void populate_layout_constraints(const Store& store, Legion::LayoutConstraintSet& layout_constraints) const; @@ -142,15 +144,23 @@ struct StoreMapping { std::set requirement_indices() const; std::set requirements() const; - public: + private: + friend class BaseMapper; void populate_layout_constraints(Legion::LayoutConstraintSet& layout_constraints) const; public: static StoreMapping default_mapping(const Store& store, StoreTarget target, bool exact = false); }; +struct MachineQueryInterface { + virtual const std::vector& cpus() const = 0; + virtual const std::vector& gpus() const = 0; + virtual const std::vector& omps() const = 0; + virtual uint32_t total_nodes() const = 0; +}; + struct LegateMapper { - virtual bool is_pure() const = 0; + virtual void set_machine(const MachineQueryInterface* machine) = 0; virtual TaskTarget task_target(const Task& task, const std::vector& options) = 0; virtual std::vector store_mappings(const Task& task, const std::vector& options) = 0; diff --git a/src/core/mapping/operation.cc b/src/core/mapping/operation.cc index 03f34d5b2..fb6826ac6 100644 --- a/src/core/mapping/operation.cc +++ b/src/core/mapping/operation.cc @@ -20,13 +20,10 @@ namespace legate { namespace mapping { -using LegionTask = Legion::Task; -using LegionCopy = Legion::Copy; - -using namespace Legion; -using namespace Legion::Mapping; - -RegionField::RegionField(const RegionRequirement* req, int32_t dim, uint32_t idx, FieldID fid) +RegionField::RegionField(const Legion::RegionRequirement* req, + int32_t dim, + uint32_t idx, + Legion::FieldID fid) : req_(req), dim_(dim), idx_(idx), fid_(fid) { } @@ -38,12 +35,13 @@ bool RegionField::can_colocate_with(const RegionField& other) const return my_req->region.get_tree_id() == other_req->region.get_tree_id(); } -Domain RegionField::domain(MapperRuntime* runtime, const MapperContext context) const +Domain RegionField::domain(Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context) const { return runtime->get_index_space_domain(context, get_index_space()); } -IndexSpace RegionField::get_index_space() const { return req_->region.get_index_space(); } +Legion::IndexSpace RegionField::get_index_space() const { return req_->region.get_index_space(); } FutureWrapper::FutureWrapper(uint32_t idx, const Domain& domain) : idx_(idx), domain_(domain) {} @@ -139,10 +137,10 @@ Domain Store::domain() const return result; } -Task::Task(const LegionTask* task, +Task::Task(const Legion::Task* task, const LibraryContext& library, - MapperRuntime* runtime, - const MapperContext context) + Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context) : task_(task), library_(library) { TaskDeserializer dez(task, runtime, context); @@ -154,7 +152,9 @@ Task::Task(const LegionTask* task, int64_t Task::task_id() const { return library_.get_local_task_id(task_->task_id); } -Copy::Copy(const LegionCopy* copy, MapperRuntime* runtime, const MapperContext context) +Copy::Copy(const Legion::Copy* copy, + Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context) : copy_(copy) { CopyDeserializer dez(copy->mapper_data, diff --git a/src/core/mapping/operation.h b/src/core/mapping/operation.h index 0cc5dc267..fb84fe27c 100644 --- a/src/core/mapping/operation.h +++ b/src/core/mapping/operation.h @@ -43,12 +43,12 @@ class RegionField { public: template - Legion::Rect shape(Legion::Mapping::MapperRuntime* runtime, - const Legion::Mapping::MapperContext context) const; + Rect shape(Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context) const; public: - Legion::Domain domain(Legion::Mapping::MapperRuntime* runtime, - const Legion::Mapping::MapperContext context) const; + Domain domain(Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context) const; public: bool operator==(const RegionField& other) const; @@ -76,7 +76,7 @@ class RegionField { class FutureWrapper { public: FutureWrapper() {} - FutureWrapper(uint32_t idx, const Legion::Domain& domain); + FutureWrapper(uint32_t idx, const Domain& domain); public: FutureWrapper(const FutureWrapper& other) = default; @@ -88,12 +88,12 @@ class FutureWrapper { public: template - Legion::Rect shape() const; - Legion::Domain domain() const; + Rect shape() const; + Domain domain() const; private: uint32_t idx_{-1U}; - Legion::Domain domain_{}; + Domain domain_{}; }; class Store { @@ -131,7 +131,7 @@ class Store { public: bool is_reduction() const { return redop_id_ > 0; } - Legion::ReductionOpID redop() const { return redop_id_; } + int32_t redop() const { return redop_id_; } public: bool can_colocate_with(const Store& other) const; @@ -145,10 +145,10 @@ class Store { public: template - Legion::Rect shape() const; + Rect shape() const; public: - Legion::Domain domain() const; + Domain domain() const; private: bool is_future_{false}; @@ -186,7 +186,7 @@ class Task { const std::vector& scalars() const { return scalars_; } public: - Legion::DomainPoint point() const { return task_->index_point; } + DomainPoint point() const { return task_->index_point; } private: const LibraryContext& library_; @@ -210,7 +210,7 @@ class Copy { const std::vector& output_indirections() const { return output_indirections_; } public: - Legion::DomainPoint point() const { return copy_->index_point; } + DomainPoint point() const { return copy_->index_point; } private: const Legion::Copy* copy_; diff --git a/src/core/runtime/context.cc b/src/core/runtime/context.cc index c482ae3aa..f696c101b 100644 --- a/src/core/runtime/context.cc +++ b/src/core/runtime/context.cc @@ -32,24 +32,22 @@ namespace legate { -LibraryContext::LibraryContext(Legion::Runtime* runtime, - const std::string& library_name, - const ResourceConfig& config) - : runtime_(runtime), library_name_(library_name) +LibraryContext::LibraryContext(const std::string& library_name, const ResourceConfig& config) + : runtime_(Legion::Runtime::get_runtime()), library_name_(library_name) { task_scope_ = ResourceScope( - runtime->generate_library_task_ids(library_name.c_str(), config.max_tasks), config.max_tasks); + runtime_->generate_library_task_ids(library_name.c_str(), config.max_tasks), config.max_tasks); mapper_scope_ = - ResourceScope(runtime->generate_library_mapper_ids(library_name.c_str(), config.max_mappers), + ResourceScope(runtime_->generate_library_mapper_ids(library_name.c_str(), config.max_mappers), config.max_mappers); redop_scope_ = ResourceScope( - runtime->generate_library_reduction_ids(library_name.c_str(), config.max_reduction_ops), + runtime_->generate_library_reduction_ids(library_name.c_str(), config.max_reduction_ops), config.max_reduction_ops); proj_scope_ = ResourceScope( - runtime->generate_library_projection_ids(library_name.c_str(), config.max_projections), + runtime_->generate_library_projection_ids(library_name.c_str(), config.max_projections), config.max_projections); shard_scope_ = ResourceScope( - runtime->generate_library_sharding_ids(library_name.c_str(), config.max_shardings), + runtime_->generate_library_sharding_ids(library_name.c_str(), config.max_shardings), config.max_shardings); } @@ -148,13 +146,15 @@ bool LibraryContext::valid_sharding_id(Legion::ShardingID shard_id) const return shard_scope_.in_scope(shard_id); } -void LibraryContext::register_mapper(mapping::BaseMapper* mapper, int64_t local_mapper_id) const +void LibraryContext::register_mapper(std::unique_ptr mapper, + int64_t local_mapper_id) const { - auto mapper_id = get_mapper_id(local_mapper_id); + auto base_mapper = new legate::mapping::BaseMapper( + std::move(mapper), runtime_, Realm::Machine::get_machine(), *this); + Legion::Mapping::Mapper* legion_mapper = base_mapper; if (Core::log_mapping_decisions) - runtime_->add_mapper(mapper_id, new Legion::Mapping::LoggingWrapper(mapper, &mapper->logger)); - else - runtime_->add_mapper(mapper_id, mapper); + legion_mapper = new Legion::Mapping::LoggingWrapper(base_mapper, &base_mapper->logger); + runtime_->add_mapper(get_mapper_id(local_mapper_id), legion_mapper); } TaskContext::TaskContext(const Legion::Task* task, @@ -203,7 +203,7 @@ TaskContext::TaskContext(const Legion::Task* task, #ifdef LEGATE_USE_CUDA // If the task is running on a GPU and there is at least one scalar store for reduction, // we need to wait for all the host-to-device copies for initialization to finish - if (Legion::Processor::get_executing_processor().kind() == Legion::Processor::Kind::TOC_PROC) + if (Processor::get_executing_processor().kind() == Processor::Kind::TOC_PROC) for (auto& reduction : reductions_) if (reduction.is_future()) { CHECK_CUDA(cudaDeviceSynchronize()); @@ -214,9 +214,9 @@ TaskContext::TaskContext(const Legion::Task* task, bool TaskContext::is_single_task() const { return !task_->is_index_space; } -Legion::DomainPoint TaskContext::get_task_index() const { return task_->index_point; } +DomainPoint TaskContext::get_task_index() const { return task_->index_point; } -Legion::Domain TaskContext::get_launch_domain() const { return task_->index_domain; } +Domain TaskContext::get_launch_domain() const { return task_->index_domain; } void TaskContext::make_all_unbound_stores_empty() { diff --git a/src/core/runtime/context.h b/src/core/runtime/context.h index b387b4979..2a0c7624a 100644 --- a/src/core/runtime/context.h +++ b/src/core/runtime/context.h @@ -16,16 +16,21 @@ #pragma once +#include + #include "legion.h" +// Must be included after legion.h +#include "legate_defines.h" #include "core/comm/communicator.h" #include "core/task/return.h" +#include "core/utilities/typedefs.h" namespace legate { namespace mapping { -class BaseMapper; +class LegateMapper; } // namespace mapping @@ -70,9 +75,7 @@ class ResourceScope { class LibraryContext { public: - LibraryContext(Legion::Runtime* runtime, - const std::string& library_name, - const ResourceConfig& config); + LibraryContext(const std::string& library_name, const ResourceConfig& config); public: LibraryContext(const LibraryContext&) = default; @@ -102,7 +105,10 @@ class LibraryContext { bool valid_sharding_id(Legion::ShardingID shard_id) const; public: - void register_mapper(mapping::BaseMapper* mapper, int64_t local_mapper_id = 0) const; + template + void register_reduction_operator(); + void register_mapper(std::unique_ptr mapper, + int64_t local_mapper_id = 0) const; private: Legion::Runtime* runtime_; @@ -133,8 +139,8 @@ class TaskContext { public: bool is_single_task() const; bool can_raise_exception() const { return can_raise_exception_; } - Legion::DomainPoint get_task_index() const; - Legion::Domain get_launch_domain() const; + DomainPoint get_task_index() const; + Domain get_launch_domain() const; public: void make_all_unbound_stores_empty(); @@ -159,3 +165,5 @@ class TaskContext { }; } // namespace legate + +#include "core/runtime/context.inl" diff --git a/src/core/runtime/context.inl b/src/core/runtime/context.inl new file mode 100644 index 000000000..e847b5bf1 --- /dev/null +++ b/src/core/runtime/context.inl @@ -0,0 +1,76 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "core/runtime/context.h" + +namespace legate { + +#ifndef REALM_COMPILER_IS_NVCC + +#ifdef LEGATE_USE_CUDA +extern Legion::Logger log_legate; +#endif + +template +void LibraryContext::register_reduction_operator() +{ +#ifdef LEGATE_USE_CUDA + log_legate.error("Reduction operators must be registered in a .cu file when CUDA is enabled"); + LEGATE_ABORT; +#endif + Legion::Runtime::register_reduction_op(get_reduction_op_id(REDOP::REDOP_ID)); +} + +#else // ifndef REALM_COMPILER_IS_NVCC + +namespace detail { + +template +class CUDAReductionOpWrapper : public T { + public: + static const bool has_cuda_reductions = true; + + template + __device__ static void apply_cuda(typename T::LHS& lhs, typename T::RHS rhs) + { + T::template apply(lhs, rhs); + } + + template + __device__ static void fold_cuda(typename T::LHS& lhs, typename T::RHS rhs) + { + T::template fold(lhs, rhs); + } +}; + +} // namespace detail + +template +void LibraryContext::register_reduction_operator() +{ + Legion::Runtime::register_reduction_op( + get_reduction_op_id(REDOP::REDOP_ID), + Realm::ReductionOpUntyped::create_reduction_op>(), + nullptr, + nullptr, + false); +} + +#endif // ifndef REALM_COMPILER_IS_NVCC + +} // namespace legate diff --git a/src/core/runtime/projection.cc b/src/core/runtime/projection.cc index 9392cd051..c45794d68 100644 --- a/src/core/runtime/projection.cc +++ b/src/core/runtime/projection.cc @@ -22,36 +22,33 @@ #include "core/runtime/projection.h" #include "core/utilities/dispatch.h" +#include "core/utilities/typedefs.h" #include "legate_defines.h" -using namespace Legion; - namespace legate { -extern Logger log_legate; - // This special functor overrides the default projection implementation because it needs // to know the the target color space for delinearization. Also note that this functor's // project_point passes through input points, as we already know they are always 1D points // and the output will be linearized back to integers. class DelinearizationFunctor : public LegateProjectionFunctor { public: - DelinearizationFunctor(Runtime* runtime); + DelinearizationFunctor(Legion::Runtime* runtime); public: virtual Legion::LogicalRegion project(Legion::LogicalPartition upper_bound, - const Legion::DomainPoint& point, - const Legion::Domain& launch_domain) override; + const DomainPoint& point, + const Domain& launch_domain) override; public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, - const Legion::Domain& launch_domain) const override; + virtual DomainPoint project_point(const DomainPoint& point, + const Domain& launch_domain) const override; }; template class AffineFunctor : public LegateProjectionFunctor { public: - AffineFunctor(Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets); + AffineFunctor(Legion::Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets); public: DomainPoint project_point(const DomainPoint& point, const Domain& launch_domain) const override; @@ -64,26 +61,27 @@ class AffineFunctor : public LegateProjectionFunctor { Point offsets_; }; -LegateProjectionFunctor::LegateProjectionFunctor(Runtime* rt) : ProjectionFunctor(rt) {} +LegateProjectionFunctor::LegateProjectionFunctor(Legion::Runtime* rt) : ProjectionFunctor(rt) {} -LogicalRegion LegateProjectionFunctor::project(LogicalPartition upper_bound, - const DomainPoint& point, - const Domain& launch_domain) +Legion::LogicalRegion LegateProjectionFunctor::project(Legion::LogicalPartition upper_bound, + const DomainPoint& point, + const Domain& launch_domain) { const DomainPoint dp = project_point(point, launch_domain); if (runtime->has_logical_subregion_by_color(upper_bound, dp)) return runtime->get_logical_subregion_by_color(upper_bound, dp); else - return LogicalRegion::NO_REGION; + return Legion::LogicalRegion::NO_REGION; } -DelinearizationFunctor::DelinearizationFunctor(Runtime* runtime) : LegateProjectionFunctor(runtime) +DelinearizationFunctor::DelinearizationFunctor(Legion::Runtime* runtime) + : LegateProjectionFunctor(runtime) { } -LogicalRegion DelinearizationFunctor::project(LogicalPartition upper_bound, - const DomainPoint& point, - const Domain& launch_domain) +Legion::LogicalRegion DelinearizationFunctor::project(Legion::LogicalPartition upper_bound, + const DomainPoint& point, + const Domain& launch_domain) { const auto color_space = runtime->get_index_partition_color_space(upper_bound.get_index_partition()); @@ -108,17 +106,17 @@ LogicalRegion DelinearizationFunctor::project(LogicalPartition upper_bound, if (runtime->has_logical_subregion_by_color(upper_bound, delinearized)) return runtime->get_logical_subregion_by_color(upper_bound, delinearized); else - return LogicalRegion::NO_REGION; + return Legion::LogicalRegion::NO_REGION; } -Legion::DomainPoint DelinearizationFunctor::project_point(const Legion::DomainPoint& point, - const Legion::Domain& launch_domain) const +DomainPoint DelinearizationFunctor::project_point(const DomainPoint& point, + const Domain& launch_domain) const { return point; } template -AffineFunctor::AffineFunctor(Runtime* runtime, +AffineFunctor::AffineFunctor(Legion::Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets) @@ -170,7 +168,7 @@ template } struct IdentityFunctor : public LegateProjectionFunctor { - IdentityFunctor(Runtime* runtime) : LegateProjectionFunctor(runtime) {} + IdentityFunctor(Legion::Runtime* runtime) : LegateProjectionFunctor(runtime) {} DomainPoint project_point(const DomainPoint& point, const Domain&) const override { return point; @@ -178,7 +176,7 @@ struct IdentityFunctor : public LegateProjectionFunctor { }; static LegateProjectionFunctor* identity_functor{nullptr}; -static std::unordered_map functor_table{}; +static std::unordered_map functor_table{}; static std::mutex functor_table_lock{}; struct create_affine_functor_fn { @@ -218,8 +216,11 @@ struct create_affine_functor_fn { } template - void operator()( - Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets, ProjectionID proj_id) + void operator()(Legion::Runtime* runtime, + int32_t* dims, + int32_t* weights, + int32_t* offsets, + Legion::ProjectionID proj_id) { auto functor = new AffineFunctor(runtime, dims, weights, offsets); #ifdef DEBUG_LEGATE @@ -251,7 +252,8 @@ void register_legate_core_projection_functors(Legion::Runtime* runtime, identity_functor = new IdentityFunctor(runtime); } -LegateProjectionFunctor* find_legate_projection_functor(ProjectionID proj_id, bool allow_missing) +LegateProjectionFunctor* find_legate_projection_functor(Legion::ProjectionID proj_id, + bool allow_missing) { if (0 == proj_id) return identity_functor; const std::lock_guard lock(functor_table_lock); @@ -264,7 +266,7 @@ LegateProjectionFunctor* find_legate_projection_functor(ProjectionID proj_id, bo return result; } -struct LinearizingPointTransformFunctor : public PointTransformFunctor { +struct LinearizingPointTransformFunctor : public Legion::PointTransformFunctor { // This is actually an invertible functor, but we will not use this for inversion virtual bool is_invertible(void) const { return false; } @@ -300,7 +302,7 @@ void legate_register_affine_projection_functor(int32_t src_ndim, int32_t* offsets, legion_projection_id_t proj_id) { - auto runtime = Runtime::get_runtime(); + auto runtime = Legion::Runtime::get_runtime(); legate::double_dispatch(src_ndim, tgt_ndim, legate::create_affine_functor_fn{}, diff --git a/src/core/runtime/projection.h b/src/core/runtime/projection.h index cf74d1689..4eb2bb1ad 100644 --- a/src/core/runtime/projection.h +++ b/src/core/runtime/projection.h @@ -19,6 +19,7 @@ #include "legion.h" #include "core/runtime/context.h" +#include "core/utilities/typedefs.h" namespace legate { @@ -30,8 +31,8 @@ class LegateProjectionFunctor : public Legion::ProjectionFunctor { public: using Legion::ProjectionFunctor::project; virtual Legion::LogicalRegion project(Legion::LogicalPartition upper_bound, - const Legion::DomainPoint& point, - const Legion::Domain& launch_domain); + const DomainPoint& point, + const Domain& launch_domain); public: // legate projection functors are almost always functional and don't traverse the region tree @@ -42,8 +43,8 @@ class LegateProjectionFunctor : public Legion::ProjectionFunctor { void set_collective() { is_collective_ = true; } public: - virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, - const Legion::Domain& launch_domain) const = 0; + virtual DomainPoint project_point(const DomainPoint& point, + const Domain& launch_domain) const = 0; private: bool is_collective_ = false; diff --git a/src/core/runtime/runtime.cc b/src/core/runtime/runtime.cc index 27e14a7da..dbc34030b 100644 --- a/src/core/runtime/runtime.cc +++ b/src/core/runtime/runtime.cc @@ -23,12 +23,11 @@ #include "core/task/task.h" #include "core/utilities/deserializer.h" #include "core/utilities/machine.h" +#include "core/utilities/nvtx_help.h" #include "legate.h" namespace legate { -using namespace Legion; - Logger log_legate("legate"); // This is the unique string name for our library which can be used @@ -95,7 +94,7 @@ static void extract_scalar_task( Legion::Runtime* runtime; Legion::Runtime::legion_task_preamble(args, arglen, p, task, regions, legion_context, runtime); - Core::show_progress(task, legion_context, runtime, task->get_task_name()); + Core::show_progress(task, legion_context, runtime); TaskContext context(task, *regions, legion_context, runtime); auto idx = context.scalars()[0].value(); @@ -112,50 +111,73 @@ static void extract_scalar_task( /*static*/ void Core::show_progress(const Legion::Task* task, Legion::Context ctx, - Legion::Runtime* runtime, - const char* task_name) + Legion::Runtime* runtime) { if (!Core::show_progress_requested) return; const auto exec_proc = runtime->get_executing_processor(ctx); - const auto proc_kind_str = (exec_proc.kind() == Legion::Processor::LOC_PROC) ? "CPU" - : (exec_proc.kind() == Legion::Processor::TOC_PROC) ? "GPU" - : "OpenMP"; + const auto proc_kind_str = (exec_proc.kind() == Processor::LOC_PROC) ? "CPU" + : (exec_proc.kind() == Processor::TOC_PROC) ? "GPU" + : "OpenMP"; std::stringstream point_str; const auto& point = task->index_point; point_str << point[0]; - for (int32_t dim = 1; dim < task->index_point.dim; ++dim) point_str << "," << point[dim]; + for (int32_t dim = 1; dim < point.dim; ++dim) point_str << "," << point[dim]; log_legate.print("%s %s task [%s], pt = (%s), proc = " IDFMT, - task_name, + task->get_task_name(), proc_kind_str, task->get_provenance_string().c_str(), point_str.str().c_str(), exec_proc.id); } -/*static*/ void Core::report_unexpected_exception(const char* task_name, +/*static*/ void Core::report_unexpected_exception(const Legion::Task* task, const legate::TaskException& e) { log_legate.error( "Task %s threw an exception \"%s\", but the task did not declare any exception. " "Please specify a Python exception that you want this exception to be re-thrown with " "using 'throws_exception'.", - task_name, + task->get_task_name(), e.error_message().c_str()); LEGATE_ABORT; } -void register_legate_core_tasks(Machine machine, Runtime* runtime, const LibraryContext& context) +namespace detail { + +struct RegistrationCallbackArgs { + Core::RegistrationCallback callback; +}; + +static void invoke_legate_registration_callback(const Legion::RegistrationCallbackArgs& args) +{ + auto p_args = static_cast(args.buffer.get_ptr()); + p_args->callback(); +}; + +} // namespace detail + +/*static*/ void Core::perform_registration(RegistrationCallback callback) +{ + legate::detail::RegistrationCallbackArgs args{callback}; + Legion::UntypedBuffer buffer(&args, sizeof(args)); + Legion::Runtime::perform_registration_callback( + detail::invoke_legate_registration_callback, buffer, true /*global*/); +} + +void register_legate_core_tasks(Legion::Machine machine, + Legion::Runtime* runtime, + const LibraryContext& context) { - const TaskID extract_scalar_task_id = context.get_task_id(LEGATE_CORE_EXTRACT_SCALAR_TASK_ID); + auto extract_scalar_task_id = context.get_task_id(LEGATE_CORE_EXTRACT_SCALAR_TASK_ID); const char* extract_scalar_task_name = "core::extract_scalar"; runtime->attach_name( extract_scalar_task_id, extract_scalar_task_name, false /*mutable*/, true /*local only*/); auto make_registrar = [&](auto task_id, auto* task_name, auto proc_kind) { - TaskVariantRegistrar registrar(task_id, task_name); - registrar.add_constraint(ProcessorConstraint(proc_kind)); + Legion::TaskVariantRegistrar registrar(task_id, task_name); + registrar.add_constraint(Legion::ProcessorConstraint(proc_kind)); registrar.set_leaf(true); registrar.global_registration = false; return registrar; @@ -178,10 +200,11 @@ void register_legate_core_tasks(Machine machine, Runtime* runtime, const Library comm::register_tasks(machine, runtime, context); } -extern void register_exception_reduction_op(Runtime* runtime, const LibraryContext& context); +extern void register_exception_reduction_op(Legion::Runtime* runtime, + const LibraryContext& context); -/*static*/ void core_registration_callback(Machine machine, - Runtime* runtime, +/*static*/ void core_registration_callback(Legion::Machine machine, + Legion::Runtime* runtime, const std::set& local_procs) { ResourceConfig config; @@ -190,7 +213,7 @@ extern void register_exception_reduction_op(Runtime* runtime, const LibraryConte // We register one sharding functor for each new projection functor config.max_shardings = LEGATE_CORE_MAX_FUNCTOR_ID; config.max_reduction_ops = LEGATE_CORE_MAX_REDUCTION_OP_ID; - LibraryContext context(runtime, core_library_name, config); + LibraryContext context(core_library_name, config); register_legate_core_tasks(machine, runtime, context); @@ -203,7 +226,7 @@ extern void register_exception_reduction_op(Runtime* runtime, const LibraryConte register_legate_core_sharding_functors(runtime, context); auto fut = runtime->select_tunable_value( - Runtime::get_context(), LEGATE_CORE_TUNABLE_HAS_SOCKET_MEM, context.get_mapper_id(0)); + Legion::Runtime::get_context(), LEGATE_CORE_TUNABLE_HAS_SOCKET_MEM, context.get_mapper_id(0)); Core::has_socket_mem = fut.get_result(); } diff --git a/src/core/runtime/runtime.h b/src/core/runtime/runtime.h index b7b86c836..723582cc6 100644 --- a/src/core/runtime/runtime.h +++ b/src/core/runtime/runtime.h @@ -27,15 +27,18 @@ extern uint32_t extract_env(const char* env_name, const uint32_t default_value, const uint32_t test_value); -class Core { +struct Core { public: static void parse_config(void); static void shutdown(void); static void show_progress(const Legion::Task* task, Legion::Context ctx, - Legion::Runtime* runtime, - const char* task_name); - static void report_unexpected_exception(const char* task_name, const legate::TaskException& e); + Legion::Runtime* runtime); + static void report_unexpected_exception(const Legion::Task* task, const legate::TaskException& e); + + public: + using RegistrationCallback = void (*)(); + static void perform_registration(RegistrationCallback callback); public: // Configuration settings diff --git a/src/core/runtime/shard.cc b/src/core/runtime/shard.cc index eca6a7775..db2e74434 100644 --- a/src/core/runtime/shard.cc +++ b/src/core/runtime/shard.cc @@ -23,16 +23,16 @@ #include "core/runtime/shard.h" #include "core/utilities/linearize.h" -using namespace Legion; - namespace legate { -static std::unordered_map functor_id_table; +static std::unordered_map functor_id_table; static std::mutex functor_table_lock; -class ToplevelTaskShardingFunctor : public ShardingFunctor { +class ToplevelTaskShardingFunctor : public Legion::ShardingFunctor { public: - virtual ShardID shard(const DomainPoint& p, const Domain& launch_space, const size_t total_shards) + virtual Legion::ShardID shard(const DomainPoint& p, + const Domain& launch_space, + const size_t total_shards) { // Just tile this space in 1D const Point<1> point = p; @@ -43,9 +43,11 @@ class ToplevelTaskShardingFunctor : public ShardingFunctor { } }; -class LinearizingShardingFunctor : public ShardingFunctor { +class LinearizingShardingFunctor : public Legion::ShardingFunctor { public: - virtual ShardID shard(const DomainPoint& p, const Domain& launch_space, const size_t total_shards) + virtual Legion::ShardID shard(const DomainPoint& p, + const Domain& launch_space, + const size_t total_shards) { const size_t size = launch_space.get_volume(); const size_t chunk = (size + total_shards - 1) / total_shards; @@ -54,7 +56,7 @@ class LinearizingShardingFunctor : public ShardingFunctor { virtual bool is_invertible(void) const { return true; } - virtual void invert(ShardID shard, + virtual void invert(Legion::ShardID shard, const Domain& shard_domain, const Domain& full_domain, const size_t total_shards, @@ -95,14 +97,14 @@ void register_legate_core_sharding_functors(Legion::Runtime* runtime, const Libr functor_id_table[context.get_projection_id(LEGATE_CORE_DELINEARIZE_PROJ_ID)] = sharding_id; } -class LegateShardingFunctor : public ShardingFunctor { +class LegateShardingFunctor : public Legion::ShardingFunctor { public: LegateShardingFunctor(LegateProjectionFunctor* proj_functor) : proj_functor_(proj_functor) {} public: - virtual ShardID shard(const DomainPoint& p, - const Domain& launch_space, - const size_t total_shards) override + virtual Legion::ShardID shard(const DomainPoint& p, + const Domain& launch_space, + const size_t total_shards) override { auto lo = proj_functor_->project_point(launch_space.lo(), launch_space); auto hi = proj_functor_->project_point(launch_space.hi(), launch_space); @@ -117,25 +119,25 @@ class LegateShardingFunctor : public ShardingFunctor { LegateProjectionFunctor* proj_functor_; }; -ShardingID find_sharding_functor_by_projection_functor(Legion::ProjectionID proj_id) +Legion::ShardingID find_sharding_functor_by_projection_functor(Legion::ProjectionID proj_id) { const std::lock_guard lock(legate::functor_table_lock); assert(functor_id_table.find(proj_id) != functor_id_table.end()); return functor_id_table[proj_id]; } -struct callback_args_t { +struct ShardingCallbackArgs { Legion::ShardID shard_id; Legion::ProjectionID proj_id; }; static void sharding_functor_registration_callback(const Legion::RegistrationCallbackArgs& args) { - auto p_args = static_cast(args.buffer.get_ptr()); + auto p_args = static_cast(args.buffer.get_ptr()); auto shard_id = p_args->shard_id; auto proj_id = p_args->proj_id; - auto runtime = Runtime::get_runtime(); + auto runtime = Legion::Runtime::get_runtime(); auto sharding_functor = new legate::LegateShardingFunctor(legate::find_legate_projection_functor(proj_id)); runtime->register_sharding_functor(shard_id, sharding_functor, true /*silence warnings*/); @@ -148,13 +150,13 @@ extern "C" { void legate_create_sharding_functor_using_projection(Legion::ShardID shard_id, Legion::ProjectionID proj_id) { - auto runtime = Runtime::get_runtime(); - legate::callback_args_t args{shard_id, proj_id}; + auto runtime = Legion::Runtime::get_runtime(); + legate::ShardingCallbackArgs args{shard_id, proj_id}; { const std::lock_guard lock(legate::functor_table_lock); legate::functor_id_table[proj_id] = shard_id; } - UntypedBuffer buffer(&args, sizeof(args)); + Legion::UntypedBuffer buffer(&args, sizeof(args)); Legion::Runtime::perform_registration_callback( legate::sharding_functor_registration_callback, buffer, false /*global*/, false /*dedup*/); } diff --git a/src/core/task/registrar.cc b/src/core/task/registrar.cc new file mode 100644 index 000000000..3386d2010 --- /dev/null +++ b/src/core/task/registrar.cc @@ -0,0 +1,99 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "core/task/registrar.h" + +#include "core/runtime/context.h" +#include "core/utilities/typedefs.h" + +namespace legate { + +struct PendingTaskVariant : public Legion::TaskVariantRegistrar { + PendingTaskVariant(void) + : Legion::TaskVariantRegistrar(), task_name(nullptr), var(LEGATE_NO_VARIANT) + { + } + PendingTaskVariant(Legion::TaskID tid, + bool global, + const char* var_name, + const char* t_name, + const Legion::CodeDescriptor& desc, + LegateVariantCode v, + size_t ret) + : Legion::TaskVariantRegistrar(tid, global, var_name), + task_name(t_name), + descriptor(desc), + var(v), + ret_size(ret) + { + } + + const char* task_name; + Legion::CodeDescriptor descriptor; + LegateVariantCode var; + size_t ret_size; +}; + +void TaskRegistrar::record_variant(Legion::TaskID tid, + const char* task_name, + const Legion::CodeDescriptor& desc, + Legion::ExecutionConstraintSet& execution_constraints, + Legion::TaskLayoutConstraintSet& layout_constraints, + LegateVariantCode var, + Processor::Kind kind, + const VariantOptions& options) +{ + assert((kind == Processor::LOC_PROC) || (kind == Processor::TOC_PROC) || + (kind == Processor::OMP_PROC)); + + // Buffer these up until we can do our actual registration with the runtime + auto registrar = new PendingTaskVariant(tid, + false /*global*/, + (kind == Processor::LOC_PROC) ? "CPU" + : (kind == Processor::TOC_PROC) ? "GPU" + : "OpenMP", + task_name, + desc, + var, + options.return_size); + + registrar->execution_constraints.swap(execution_constraints); + registrar->layout_constraints.swap(layout_constraints); + registrar->add_constraint(Legion::ProcessorConstraint(kind)); + registrar->set_leaf(options.leaf); + registrar->set_inner(options.inner); + registrar->set_idempotent(options.idempotent); + registrar->set_concurrent(options.concurrent); + + pending_task_variants_.push_back(registrar); +} + +void TaskRegistrar::register_all_tasks(const LibraryContext& context) +{ + auto runtime = Legion::Runtime::get_runtime(); + // Do all our registrations + for (auto& task : pending_task_variants_) { + task->task_id = + context.get_task_id(task->task_id); // Convert a task local task id to a global id + // Attach the task name too for debugging + runtime->attach_name(task->task_id, task->task_name, false /*mutable*/, true /*local only*/); + runtime->register_task_variant(*task, task->descriptor, nullptr, 0, task->ret_size, task->var); + delete task; + } + pending_task_variants_.clear(); +} + +} // namespace legate diff --git a/src/core/task/registrar.h b/src/core/task/registrar.h new file mode 100644 index 000000000..ad7e3ddb3 --- /dev/null +++ b/src/core/task/registrar.h @@ -0,0 +1,49 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include + +#include "legion.h" + +#include "core/task/variant.h" +#include "core/utilities/typedefs.h" + +namespace legate { + +class LibraryContext; +class PendingTaskVariant; + +class TaskRegistrar { + public: + void record_variant(Legion::TaskID tid, + const char* task_name, + const Legion::CodeDescriptor& desc, + Legion::ExecutionConstraintSet& execution_constraints, + Legion::TaskLayoutConstraintSet& layout_constraints, + LegateVariantCode var, + Processor::Kind kind, + const VariantOptions& options); + + public: + void register_all_tasks(const LibraryContext& context); + + private: + std::vector pending_task_variants_; +}; + +} // namespace legate diff --git a/src/core/task/return.cc b/src/core/task/return.cc index 98c70419a..10d69d079 100644 --- a/src/core/task/return.cc +++ b/src/core/task/return.cc @@ -31,8 +31,6 @@ #include "core/cuda/stream_pool.h" #endif -using namespace Legion; - namespace legate { ReturnValue::ReturnValue(Legion::UntypedDeferredValue value, size_t size) @@ -43,7 +41,7 @@ ReturnValue::ReturnValue(Legion::UntypedDeferredValue value, size_t size) /*static*/ ReturnValue ReturnValue::unpack(const void* ptr, size_t size, Memory::Kind memory_kind) { - ReturnValue result(UntypedDeferredValue(size, memory_kind), size); + ReturnValue result(Legion::UntypedDeferredValue(size, memory_kind), size); #ifdef DEBUG_LEGATE assert(!result.is_device_value()); #endif @@ -108,12 +106,14 @@ static void pack_returned_exception(const ReturnedException& value, void*& ptr, value.legion_serialize(ptr); } -static void returned_exception_init(const ReductionOp* reduction_op, void*& ptr, size_t& size) +static void returned_exception_init(const Legion::ReductionOp* reduction_op, + void*& ptr, + size_t& size) { pack_returned_exception(JoinReturnedException::identity, ptr, size); } -static void returned_exception_fold(const ReductionOp* reduction_op, +static void returned_exception_fold(const Legion::ReductionOp* reduction_op, void*& lhs_ptr, size_t& lhs_size, const void* rhs_ptr) @@ -171,7 +171,7 @@ ReturnValue ReturnedException::pack() const { auto buffer_size = legion_buffer_size(); auto mem_kind = find_memory_kind_for_executing_processor(); - auto buffer = UntypedDeferredValue(buffer_size, mem_kind); + auto buffer = Legion::UntypedDeferredValue(buffer_size, mem_kind); AccessorWO acc(buffer, buffer_size, false); legion_serialize(acc.ptr(0)); @@ -296,10 +296,10 @@ void ReturnValues::legion_deserialize(const void* buffer) return ReturnValue::unpack(values + offset, size, kind); } -void ReturnValues::finalize(Context legion_context) const +void ReturnValues::finalize(Legion::Context legion_context) const { if (return_values_.empty()) { - Runtime::legion_task_postamble(legion_context); + Legion::Runtime::legion_task_postamble(legion_context); return; } else if (return_values_.size() == 1) { return_values_.front().finalize(legion_context); @@ -317,17 +317,18 @@ void ReturnValues::finalize(Context legion_context) const size_t return_size = legion_buffer_size(); auto return_buffer = - UntypedDeferredValue(return_size, find_memory_kind_for_executing_processor()); + Legion::UntypedDeferredValue(return_size, find_memory_kind_for_executing_processor()); AccessorWO acc(return_buffer, return_size, false); legion_serialize(acc.ptr(0)); return_buffer.finalize(legion_context); } -void register_exception_reduction_op(Runtime* runtime, const LibraryContext& context) +void register_exception_reduction_op(Legion::Runtime* runtime, const LibraryContext& context) { auto redop_id = context.get_reduction_op_id(LEGATE_CORE_JOIN_EXCEPTION_OP); auto* redop = Realm::ReductionOpUntyped::create_reduction_op(); - Runtime::register_reduction_op(redop_id, redop, returned_exception_init, returned_exception_fold); + Legion::Runtime::register_reduction_op( + redop_id, redop, returned_exception_init, returned_exception_fold); } } // namespace legate diff --git a/src/core/task/return.h b/src/core/task/return.h index 031bb71f2..6b7b9935f 100644 --- a/src/core/task/return.h +++ b/src/core/task/return.h @@ -17,6 +17,7 @@ #pragma once #include +#include "core/utilities/typedefs.h" namespace legate { @@ -29,7 +30,7 @@ struct ReturnValue { ReturnValue& operator=(const ReturnValue&) = default; public: - static ReturnValue unpack(const void* ptr, size_t size, Legion::Memory::Kind memory_kind); + static ReturnValue unpack(const void* ptr, size_t size, Memory::Kind memory_kind); public: void* ptr(); diff --git a/src/core/task/task.cc b/src/core/task/task.cc index 51ac3e1c1..26366c64b 100644 --- a/src/core/task/task.cc +++ b/src/core/task/task.cc @@ -16,54 +16,73 @@ #include "core/task/task.h" -namespace legate { +#include -using namespace Legion; +#include "realm/faults.h" -void LegateTaskRegistrar::record_variant(TaskID tid, - const char* task_name, - const CodeDescriptor& descriptor, - ExecutionConstraintSet& execution_constraints, - TaskLayoutConstraintSet& layout_constraints, - LegateVariantCode var, - Processor::Kind kind, - const VariantOptions& options) -{ - assert((kind == Processor::LOC_PROC) || (kind == Processor::TOC_PROC) || - (kind == Processor::OMP_PROC)); +#include "core/runtime/context.h" +#include "core/runtime/runtime.h" +#include "core/task/exception.h" +#include "core/task/registrar.h" +#include "core/task/return.h" +#include "core/utilities/deserializer.h" +#include "core/utilities/nvtx_help.h" +#include "core/utilities/typedefs.h" - // Buffer these up until we can do our actual registration with the runtime - pending_task_variants_.push_back(PendingTaskVariant(tid, - false /*global*/, - (kind == Processor::LOC_PROC) ? "CPU" - : (kind == Processor::TOC_PROC) ? "GPU" - : "OpenMP", - task_name, - descriptor, - var, - options.return_size)); +namespace legate { +namespace detail { - auto& registrar = pending_task_variants_.back(); - registrar.execution_constraints.swap(execution_constraints); - registrar.layout_constraints.swap(layout_constraints); - registrar.add_constraint(ProcessorConstraint(kind)); - registrar.set_leaf(options.leaf); - registrar.set_inner(options.inner); - registrar.set_idempotent(options.idempotent); - registrar.set_concurrent(options.concurrent); +std::string generate_task_name(const std::type_info& ti) +{ + std::string result; + int status = 0; + char* demangled = abi::__cxa_demangle(ti.name(), 0, 0, &status); + result = demangled; + free(demangled); + return std::move(result); } -void LegateTaskRegistrar::register_all_tasks(Runtime* runtime, LibraryContext& context) +void task_wrapper(VariantImpl variant_impl, + const char* task_name, + const void* args, + size_t arglen, + const void* userdata, + size_t userlen, + Processor p) + { - // Do all our registrations - for (auto& task : pending_task_variants_) { - task.task_id = - context.get_task_id(task.task_id); // Convert a task local task id to a global id - // Attach the task name too for debugging - runtime->attach_name(task.task_id, task.task_name, false /*mutable*/, true /*local only*/); - runtime->register_task_variant(task, task.descriptor, nullptr, 0, task.ret_size, task.var); + // Legion preamble + const Legion::Task* task; + const std::vector* regions; + Legion::Context legion_context; + Legion::Runtime* runtime; + Legion::Runtime::legion_task_preamble(args, arglen, p, task, regions, legion_context, runtime); + +#ifdef LEGATE_USE_CUDA + nvtx::Range auto_range(task_name); +#endif + + Core::show_progress(task, legion_context, runtime); + + TaskContext context(task, *regions, legion_context, runtime); + + ReturnValues return_values{}; + try { + if (!Core::use_empty_task) (*variant_impl)(context); + return_values = context.pack_return_values(); + } catch (legate::TaskException& e) { + if (context.can_raise_exception()) { + context.make_all_unbound_stores_empty(); + return_values = context.pack_return_values_with_exception(e.index(), e.error_message()); + } else + // If a Legate exception is thrown by a task that does not declare any exception, + // this is a bug in the library that needs to be reported to the developer + Core::report_unexpected_exception(task, e); } - pending_task_variants_.clear(); + + // Legion postamble + return_values.finalize(legion_context); } +} // namespace detail } // namespace legate diff --git a/src/core/task/task.h b/src/core/task/task.h index b90a4e86b..76a9d6eb3 100644 --- a/src/core/task/task.h +++ b/src/core/task/task.h @@ -16,293 +16,44 @@ #pragma once -#include -#include - -#include "legion.h" -#include "realm/faults.h" - -#include "core/runtime/context.h" -#include "core/runtime/runtime.h" -#include "core/task/exception.h" -#include "core/task/return.h" -#include "core/utilities/deserializer.h" -#include "core/utilities/nvtx_help.h" +#include "core/task/variant.h" #include "core/utilities/typedefs.h" namespace legate { -// We're going to allow for each task to use only up to 341 scalar output stores -constexpr size_t LEGATE_MAX_SIZE_SCALAR_RETURN = 4096; - -struct VariantOptions { - bool leaf{true}; - bool inner{false}; - bool idempotent{false}; - bool concurrent{false}; - size_t return_size{LEGATE_MAX_SIZE_SCALAR_RETURN}; - - VariantOptions& with_leaf(bool _leaf) - { - leaf = _leaf; - return *this; - } - VariantOptions& with_inner(bool _inner) - { - inner = _inner; - return *this; - } - VariantOptions& with_idempotent(bool _idempotent) - { - idempotent = _idempotent; - return *this; - } - VariantOptions& with_concurrent(bool _concurrent) - { - concurrent = _concurrent; - return *this; - } - VariantOptions& with_return_size(size_t _return_size) - { - return_size = _return_size; - return *this; - } -}; +class TaskContext; -using LegateVariantImpl = void (*)(TaskContext&); +using VariantImpl = void (*)(TaskContext&); template -class LegateTask { - protected: - // Helper class for checking for various kinds of variants - using __no = int8_t[1]; - using __yes = int8_t[2]; - struct HasCPUVariant { - template - static __yes& test(decltype(&U::cpu_variant)); - template - static __no& test(...); - static const bool value = (sizeof(test(0)) == sizeof(__yes)); - }; - struct HasOMPVariant { - template - static __yes& test(decltype(&U::omp_variant)); - template - static __no& test(...); - static const bool value = (sizeof(test(0)) == sizeof(__yes)); - }; - struct HasGPUVariant { - template - static __yes& test(decltype(&U::gpu_variant)); - template - static __no& test(...); - static const bool value = (sizeof(test(0)) == sizeof(__yes)); - }; +struct LegateTask { + // Exports the base class so we can access it via subclass T + using BASE = LegateTask; - public: - static const char* task_name() - { - static std::string result; - if (result.empty()) { - int status = 0; - char* demangled = abi::__cxa_demangle(typeid(T).name(), 0, 0, &status); - result = demangled; - free(demangled); - } + static void register_variants( + const std::map& all_options = {}); - return result.c_str(); - } + private: + template typename, bool> + friend struct detail::RegisterVariantImpl; - // Task wrappers so we can instrument all Legate tasks if we want - template + // A wrapper that wraps all Legate task variant implementations. Provides + // common functionalities and instrumentations + template static void legate_task_wrapper( - const void* args, size_t arglen, const void* userdata, size_t userlen, Legion::Processor p) - { - // Legion preamble - const Legion::Task* task; - const std::vector* regions; - Legion::Context legion_context; - Legion::Runtime* runtime; - Legion::Runtime::legion_task_preamble(args, arglen, p, task, regions, legion_context, runtime); - -#ifdef LEGATE_USE_CUDA - nvtx::Range auto_range(task_name()); -#endif - - Core::show_progress(task, legion_context, runtime, task_name()); + const void* args, size_t arglen, const void* userdata, size_t userlen, Processor p); - TaskContext context(task, *regions, legion_context, runtime); - - ReturnValues return_values{}; - try { - if (!Core::use_empty_task) (*TASK_PTR)(context); - return_values = context.pack_return_values(); - } catch (legate::TaskException& e) { - if (context.can_raise_exception()) { - context.make_all_unbound_stores_empty(); - return_values = context.pack_return_values_with_exception(e.index(), e.error_message()); - } else - // If a Legate exception is thrown by a task that does not declare any exception, - // this is a bug in the library that needs to be reported to the developer - Core::report_unexpected_exception(task_name(), e); - } - - // Legion postamble - return_values.finalize(legion_context); - } - - public: - // Methods for registering variants - template + // A helper to register a single task variant + template static void register_variant(Legion::ExecutionConstraintSet& execution_constraints, Legion::TaskLayoutConstraintSet& layout_constraints, LegateVariantCode var, - Legion::Processor::Kind kind, - const VariantOptions& options) - { - // Construct the code descriptor for this task so that the library - // can register it later when it is ready - Legion::CodeDescriptor desc(legate_task_wrapper); - auto task_id = T::TASK_ID; - - T::Registrar::record_variant( - task_id, T::task_name(), desc, execution_constraints, layout_constraints, var, kind, options); - } - static void register_variants( - const std::map& all_options = {}); -}; - -template -class RegisterCPUVariant { - public: - static void register_variant(const VariantOptions& options) - { - Legion::ExecutionConstraintSet execution_constraints; - Legion::TaskLayoutConstraintSet layout_constraints; - BASE::template register_variant(execution_constraints, - layout_constraints, - LEGATE_CPU_VARIANT, - Legion::Processor::LOC_PROC, - options); - } -}; + Processor::Kind kind, + const VariantOptions& options); -template -class RegisterCPUVariant { - public: - static void register_variant(const VariantOptions& options) - { - // Do nothing - } -}; - -template -class RegisterOMPVariant { - public: - static void register_variant(const VariantOptions& options) - { - Legion::ExecutionConstraintSet execution_constraints; - Legion::TaskLayoutConstraintSet layout_constraints; - BASE::template register_variant(execution_constraints, - layout_constraints, - LEGATE_OMP_VARIANT, - Legion::Processor::OMP_PROC, - options); - } -}; - -template -class RegisterOMPVariant { - public: - static void register_variant(const VariantOptions& options) - { - // Do nothing - } -}; - -template -class RegisterGPUVariant { - public: - static void register_variant(const VariantOptions& options) - { - Legion::ExecutionConstraintSet execution_constraints; - Legion::TaskLayoutConstraintSet layout_constraints; - BASE::template register_variant(execution_constraints, - layout_constraints, - LEGATE_GPU_VARIANT, - Legion::Processor::TOC_PROC, - options); - } -}; - -template -class RegisterGPUVariant { - public: - static void register_variant(const VariantOptions& options) - { - // Do nothing - } -}; - -template -/*static*/ void LegateTask::register_variants( - const std::map& all_options) -{ - // Make a copy of the map of options so that we can do find-or-create on it - auto all_options_copy = all_options; - RegisterCPUVariant, HasCPUVariant::value>::register_variant( - all_options_copy[LEGATE_CPU_VARIANT]); - RegisterOMPVariant, HasOMPVariant::value>::register_variant( - all_options_copy[LEGATE_OMP_VARIANT]); - RegisterGPUVariant, HasGPUVariant::value>::register_variant( - all_options_copy[LEGATE_GPU_VARIANT]); -} - -class LegateTaskRegistrar { - public: - void record_variant(Legion::TaskID tid, - const char* task_name, - const Legion::CodeDescriptor& desc, - Legion::ExecutionConstraintSet& execution_constraints, - Legion::TaskLayoutConstraintSet& layout_constraints, - LegateVariantCode var, - Legion::Processor::Kind kind, - const VariantOptions& options); - - public: - void register_all_tasks(Legion::Runtime* runtime, LibraryContext& context); - - private: - struct PendingTaskVariant : public Legion::TaskVariantRegistrar { - public: - PendingTaskVariant(void) - : Legion::TaskVariantRegistrar(), task_name(nullptr), var(LEGATE_NO_VARIANT) - { - } - PendingTaskVariant(Legion::TaskID tid, - bool global, - const char* var_name, - const char* t_name, - const Legion::CodeDescriptor& desc, - LegateVariantCode v, - size_t ret) - : Legion::TaskVariantRegistrar(tid, global, var_name), - task_name(t_name), - descriptor(desc), - var(v), - ret_size(ret) - { - } - - public: - const char* task_name; - Legion::CodeDescriptor descriptor; - LegateVariantCode var; - size_t ret_size; - }; - - private: - std::vector pending_task_variants_; + static const char* task_name(); }; } // namespace legate + +#include "core/task/task.inl" diff --git a/src/core/task/task.inl b/src/core/task/task.inl new file mode 100644 index 000000000..713cc3efd --- /dev/null +++ b/src/core/task/task.inl @@ -0,0 +1,79 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "core/task/task.h" + +namespace legate { + +namespace detail { + +std::string generate_task_name(const std::type_info&); + +void task_wrapper( + VariantImpl, const char*, const void*, size_t, const void*, size_t, Legion::Processor); + +}; // namespace detail + +template +template +/*static*/ void LegateTask::legate_task_wrapper( + const void* args, size_t arglen, const void* userdata, size_t userlen, Legion::Processor p) +{ + detail::task_wrapper(VARIANT_IMPL, task_name(), args, arglen, userdata, userlen, p); +} + +template +template +/*static*/ void LegateTask::register_variant( + Legion::ExecutionConstraintSet& execution_constraints, + Legion::TaskLayoutConstraintSet& layout_constraints, + LegateVariantCode var, + Legion::Processor::Kind kind, + const VariantOptions& options) +{ + // Construct the code descriptor for this task so that the library + // can register it later when it is ready + Legion::CodeDescriptor desc(legate_task_wrapper); + auto task_id = T::TASK_ID; + + T::Registrar::record_variant( + task_id, task_name(), desc, execution_constraints, layout_constraints, var, kind, options); +} + +template +/*static*/ void LegateTask::register_variants( + const std::map& all_options) +{ + // Make a copy of the map of options so that we can do find-or-create on it + auto all_options_copy = all_options; + detail::RegisterVariant::register_variant( + all_options_copy[LEGATE_CPU_VARIANT]); + detail::RegisterVariant::register_variant( + all_options_copy[LEGATE_OMP_VARIANT]); + detail::RegisterVariant::register_variant( + all_options_copy[LEGATE_GPU_VARIANT]); +} + +template +/*static*/ const char* LegateTask::task_name() +{ + static std::string result = detail::generate_task_name(typeid(T)); + return result.c_str(); +} + +} // namespace legate diff --git a/src/core/task/variant.cc b/src/core/task/variant.cc new file mode 100644 index 000000000..ea54b414e --- /dev/null +++ b/src/core/task/variant.cc @@ -0,0 +1,51 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "core/task/variant.h" + +namespace legate { + +VariantOptions& VariantOptions::with_leaf(bool _leaf) +{ + leaf = _leaf; + return *this; +} + +VariantOptions& VariantOptions::with_inner(bool _inner) +{ + inner = _inner; + return *this; +} + +VariantOptions& VariantOptions::with_idempotent(bool _idempotent) +{ + idempotent = _idempotent; + return *this; +} + +VariantOptions& VariantOptions::with_concurrent(bool _concurrent) +{ + concurrent = _concurrent; + return *this; +} + +VariantOptions& VariantOptions::with_return_size(size_t _return_size) +{ + return_size = _return_size; + return *this; +} + +} // namespace legate diff --git a/src/core/task/variant.h b/src/core/task/variant.h new file mode 100644 index 000000000..53c350302 --- /dev/null +++ b/src/core/task/variant.h @@ -0,0 +1,106 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "legion.h" + +#include "core/utilities/typedefs.h" + +namespace legate { + +// We're going to allow for each task to use only up to 341 scalar output stores +constexpr size_t LEGATE_MAX_SIZE_SCALAR_RETURN = 4096; + +struct VariantOptions { + bool leaf{true}; + bool inner{false}; + bool idempotent{false}; + bool concurrent{false}; + size_t return_size{LEGATE_MAX_SIZE_SCALAR_RETURN}; + + VariantOptions& with_leaf(bool leaf); + VariantOptions& with_inner(bool inner); + VariantOptions& with_idempotent(bool idempotent); + VariantOptions& with_concurrent(bool concurrent); + VariantOptions& with_return_size(size_t return_size); +}; + +namespace detail { + +template +using void_t = void; + +template +struct CPUVariant : std::false_type {}; + +template +struct OMPVariant : std::false_type {}; + +template +struct GPUVariant : std::false_type {}; + +template +struct CPUVariant> : std::true_type { + static constexpr auto variant = T::cpu_variant; + static constexpr auto id = LEGATE_CPU_VARIANT; + static constexpr auto proc_kind = Processor::LOC_PROC; +}; + +template +struct OMPVariant> : std::true_type { + static constexpr auto variant = T::omp_variant; + static constexpr auto id = LEGATE_OMP_VARIANT; + static constexpr auto proc_kind = Processor::OMP_PROC; +}; + +template +struct GPUVariant> : std::true_type { + static constexpr auto variant = T::gpu_variant; + static constexpr auto id = LEGATE_GPU_VARIANT; + static constexpr auto proc_kind = Processor::TOC_PROC; +}; + +template typename SELECTOR, bool HAS_VARIANT> +struct RegisterVariantImpl { + static void register_variant(const VariantOptions& options) + { + Legion::ExecutionConstraintSet execution_constraints; + Legion::TaskLayoutConstraintSet layout_constraints; + T::BASE::template register_variant::variant>( + execution_constraints, layout_constraints, SELECTOR::id, SELECTOR::proc_kind, options); + } +}; + +template typename SELECTOR> +struct RegisterVariantImpl { + static void register_variant(const VariantOptions& options) + { + // Do nothing + } +}; + +template typename SELECTOR> +struct RegisterVariant { + static void register_variant(const VariantOptions& options) + { + RegisterVariantImpl::value>::register_variant(options); + } +}; + +} // namespace detail + +} // namespace legate diff --git a/src/core/utilities/debug.h b/src/core/utilities/debug.h index e6f52897f..d0f803fe5 100644 --- a/src/core/utilities/debug.h +++ b/src/core/utilities/debug.h @@ -17,6 +17,7 @@ #pragma once #include "core/data/store.h" +#include "core/utilities/typedefs.h" #ifdef LEGATE_USE_CUDA #include @@ -26,8 +27,6 @@ namespace legate { -using namespace Legion; - #ifdef LEGATE_USE_CUDA #ifndef MAX diff --git a/src/core/utilities/deserializer.cc b/src/core/utilities/deserializer.cc index f62f5b50a..325b0d2d4 100644 --- a/src/core/utilities/deserializer.cc +++ b/src/core/utilities/deserializer.cc @@ -18,26 +18,22 @@ #include "core/data/scalar.h" #include "core/data/store.h" #include "core/utilities/machine.h" +#include "core/utilities/typedefs.h" #include "legion/legion_c.h" #include "legion/legion_c_util.h" -using LegionTask = Legion::Task; - -using namespace Legion; -using namespace Legion::Mapping; - namespace legate { -TaskDeserializer::TaskDeserializer(const LegionTask* task, - const std::vector& regions) +TaskDeserializer::TaskDeserializer(const Legion::Task* task, + const std::vector& regions) : BaseDeserializer(static_cast(task->args), task->arglen), futures_{task->futures.data(), task->futures.size()}, regions_{regions.data(), regions.size()}, outputs_() { - auto runtime = Runtime::get_runtime(); - auto ctx = Runtime::get_context(); + auto runtime = Legion::Runtime::get_runtime(); + auto ctx = Legion::Runtime::get_context(); runtime->get_output_regions(ctx, outputs_); first_task_ = !task->is_index_space || (task->index_point == task->index_domain.lo()); @@ -76,7 +72,7 @@ void TaskDeserializer::_unpack(FutureWrapper& value) auto field_size = unpack(); auto point = unpack>(); - Legion::Domain domain; + Domain domain; domain.dim = static_cast(point.size()); for (int32_t idx = 0; idx < domain.dim; ++idx) { domain.rect_data[idx] = 0; @@ -122,14 +118,14 @@ void TaskDeserializer::_unpack(Legion::PhaseBarrier& barrier) auto future = futures_[0]; futures_ = futures_.subspan(1); auto barrier_ = future.get_result(); - barrier = CObjectWrapper::unwrap(barrier_); + barrier = Legion::CObjectWrapper::unwrap(barrier_); } namespace mapping { TaskDeserializer::TaskDeserializer(const Legion::Task* task, - MapperRuntime* runtime, - MapperContext context) + Legion::Mapping::MapperRuntime* runtime, + Legion::Mapping::MapperContext context) : BaseDeserializer(static_cast(task->args), task->arglen), task_(task), runtime_(runtime), @@ -170,7 +166,7 @@ void TaskDeserializer::_unpack(FutureWrapper& value) unpack(); auto point = unpack>(); - Legion::Domain domain; + Domain domain; domain.dim = static_cast(point.size()); for (int32_t idx = 0; idx < domain.dim; ++idx) { domain.rect_data[idx] = 0; @@ -193,8 +189,8 @@ void TaskDeserializer::_unpack(RegionField& value, bool is_output_region) CopyDeserializer::CopyDeserializer(const void* args, size_t arglen, std::vector&& all_requirements, - MapperRuntime* runtime, - MapperContext context) + Legion::Mapping::MapperRuntime* runtime, + Legion::Mapping::MapperContext context) : BaseDeserializer(static_cast(args), arglen), all_reqs_(std::forward>(all_requirements)), curr_reqs_(all_reqs_.begin()), diff --git a/src/core/utilities/dispatch.h b/src/core/utilities/dispatch.h index c2e849f24..8d3d2f302 100644 --- a/src/core/utilities/dispatch.h +++ b/src/core/utilities/dispatch.h @@ -86,42 +86,42 @@ struct inner_dim_dispatch_fn { case 1: { return f.template operator()(std::forward(args)...); } -#if LEGION_MAX_DIM >= 2 +#if LEGATE_MAX_DIM >= 2 case 2: { return f.template operator()(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 3 +#if LEGATE_MAX_DIM >= 3 case 3: { return f.template operator()(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 4 +#if LEGATE_MAX_DIM >= 4 case 4: { return f.template operator()(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 5 +#if LEGATE_MAX_DIM >= 5 case 5: { return f.template operator()(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 6 +#if LEGATE_MAX_DIM >= 6 case 6: { return f.template operator()(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 7 +#if LEGATE_MAX_DIM >= 7 case 7: { return f.template operator()(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 8 +#if LEGATE_MAX_DIM >= 8 case 8: { return f.template operator()(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 9 +#if LEGATE_MAX_DIM >= 9 case 9: { return f.template operator()(std::forward(args)...); } @@ -136,47 +136,47 @@ template constexpr decltype(auto) double_dispatch(int dim, LegateTypeCode code, Functor f, Fnargs&&... args) { switch (dim) { -#if LEGION_MAX_DIM >= 1 +#if LEGATE_MAX_DIM >= 1 case 1: { return inner_type_dispatch_fn<1>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 2 +#if LEGATE_MAX_DIM >= 2 case 2: { return inner_type_dispatch_fn<2>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 3 +#if LEGATE_MAX_DIM >= 3 case 3: { return inner_type_dispatch_fn<3>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 4 +#if LEGATE_MAX_DIM >= 4 case 4: { return inner_type_dispatch_fn<4>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 5 +#if LEGATE_MAX_DIM >= 5 case 5: { return inner_type_dispatch_fn<5>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 6 +#if LEGATE_MAX_DIM >= 6 case 6: { return inner_type_dispatch_fn<6>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 7 +#if LEGATE_MAX_DIM >= 7 case 7: { return inner_type_dispatch_fn<7>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 8 +#if LEGATE_MAX_DIM >= 8 case 8: { return inner_type_dispatch_fn<8>{}(code, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 9 +#if LEGATE_MAX_DIM >= 9 case 9: { return inner_type_dispatch_fn<9>{}(code, f, std::forward(args)...); } @@ -190,47 +190,47 @@ template constexpr decltype(auto) double_dispatch(int dim1, int dim2, Functor f, Fnargs&&... args) { switch (dim1) { -#if LEGION_MAX_DIM >= 1 +#if LEGATE_MAX_DIM >= 1 case 1: { return inner_dim_dispatch_fn<1>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 2 +#if LEGATE_MAX_DIM >= 2 case 2: { return inner_dim_dispatch_fn<2>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 3 +#if LEGATE_MAX_DIM >= 3 case 3: { return inner_dim_dispatch_fn<3>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 4 +#if LEGATE_MAX_DIM >= 4 case 4: { return inner_dim_dispatch_fn<4>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 5 +#if LEGATE_MAX_DIM >= 5 case 5: { return inner_dim_dispatch_fn<5>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 6 +#if LEGATE_MAX_DIM >= 6 case 6: { return inner_dim_dispatch_fn<6>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 7 +#if LEGATE_MAX_DIM >= 7 case 7: { return inner_dim_dispatch_fn<7>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 8 +#if LEGATE_MAX_DIM >= 8 case 8: { return inner_dim_dispatch_fn<8>{}(dim2, f, std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 9 +#if LEGATE_MAX_DIM >= 9 case 9: { return inner_dim_dispatch_fn<9>{}(dim2, f, std::forward(args)...); } @@ -244,47 +244,47 @@ template constexpr decltype(auto) dim_dispatch(int dim, Functor f, Fnargs&&... args) { switch (dim) { -#if LEGION_MAX_DIM >= 1 +#if LEGATE_MAX_DIM >= 1 case 1: { return f.template operator()<1>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 2 +#if LEGATE_MAX_DIM >= 2 case 2: { return f.template operator()<2>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 3 +#if LEGATE_MAX_DIM >= 3 case 3: { return f.template operator()<3>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 4 +#if LEGATE_MAX_DIM >= 4 case 4: { return f.template operator()<4>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 5 +#if LEGATE_MAX_DIM >= 5 case 5: { return f.template operator()<5>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 6 +#if LEGATE_MAX_DIM >= 6 case 6: { return f.template operator()<6>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 7 +#if LEGATE_MAX_DIM >= 7 case 7: { return f.template operator()<7>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 8 +#if LEGATE_MAX_DIM >= 8 case 8: { return f.template operator()<8>(std::forward(args)...); } #endif -#if LEGION_MAX_DIM >= 9 +#if LEGATE_MAX_DIM >= 9 case 9: { return f.template operator()<9>(std::forward(args)...); } diff --git a/src/core/utilities/linearize.cc b/src/core/utilities/linearize.cc index 5223fd2fb..02ed39ac2 100644 --- a/src/core/utilities/linearize.cc +++ b/src/core/utilities/linearize.cc @@ -19,8 +19,6 @@ namespace legate { -using namespace Legion; - struct linearize_fn { template size_t operator()(const DomainPoint& lo_dp, const DomainPoint& hi_dp, const DomainPoint& point_dp) diff --git a/src/core/utilities/linearize.h b/src/core/utilities/linearize.h index 70a016521..a8d1720ac 100644 --- a/src/core/utilities/linearize.h +++ b/src/core/utilities/linearize.h @@ -16,16 +16,12 @@ #pragma once -#include "legion.h" +#include "core/utilities/typedefs.h" namespace legate { -size_t linearize(const Legion::DomainPoint& lo, - const Legion::DomainPoint& hi, - const Legion::DomainPoint& point); +size_t linearize(const DomainPoint& lo, const DomainPoint& hi, const DomainPoint& point); -Legion::DomainPoint delinearize(const Legion::DomainPoint& lo, - const Legion::DomainPoint& hi, - size_t idx); +DomainPoint delinearize(const DomainPoint& lo, const DomainPoint& hi, size_t idx); } // namespace legate diff --git a/src/core/utilities/machine.cc b/src/core/utilities/machine.cc index a6c5e7e5e..ca79637e9 100644 --- a/src/core/utilities/machine.cc +++ b/src/core/utilities/machine.cc @@ -19,8 +19,6 @@ #include "core/runtime/runtime.h" #include "legate_defines.h" -using namespace Legion; - namespace legate { Memory::Kind find_memory_kind_for_executing_processor(bool host_accessible) diff --git a/src/core/utilities/machine.h b/src/core/utilities/machine.h index b61824542..5e0cd9d60 100644 --- a/src/core/utilities/machine.h +++ b/src/core/utilities/machine.h @@ -18,8 +18,10 @@ #include "legion.h" +#include "core/utilities/typedefs.h" + namespace legate { -Legion::Memory::Kind find_memory_kind_for_executing_processor(bool host_accessible = true); +Memory::Kind find_memory_kind_for_executing_processor(bool host_accessible = true); } // namespace legate diff --git a/src/core/utilities/typedefs.h b/src/core/utilities/typedefs.h index f33bcff79..7ab90cb75 100644 --- a/src/core/utilities/typedefs.h +++ b/src/core/utilities/typedefs.h @@ -19,32 +19,80 @@ #include "legion.h" #include "core/legate_c.h" +#include "legate_defines.h" namespace legate { -extern Legion::Logger log_legate; +// C enum typedefs +using LegateVariantCode = legate_core_variant_t; +using LegateTypeCode = legate_core_type_code_t; +using LegateMappingTag = legate_core_mapping_tag_t; + +using Logger = Legion::Logger; + +extern Logger log_legate; + +// Re-export Legion types + +using TunableID = Legion::TunableID; + +// Geometry types + +using coord_t = Legion::coord_t; + +template +using Point = Legion::Point; +template +using Rect = Legion::Rect; -template +using Domain = Legion::Domain; +using DomainPoint = Legion::DomainPoint; + +// Accessor types + +template using AccessorRO = Legion::FieldAccessor>; -template +template using AccessorWO = Legion::FieldAccessor>; -template +template using AccessorRW = Legion::FieldAccessor>; -template +template using AccessorRD = Legion:: ReductionAccessor>; -template -using GenericAccessorRO = Legion::FieldAccessor; -template -using GenericAccessorWO = Legion::FieldAccessor; -template -using GenericAccessorRW = Legion::FieldAccessor; -using TunableID = Legion::TunableID; +// Iterators -// C enum typedefs -using LegateVariantCode = legate_core_variant_t; -using LegateTypeCode = legate_core_type_code_t; -using LegateMappingTag = legate_core_mapping_tag_t; +template +using PointInRectIterator = Legion::PointInRectIterator; +template +using RectInDomainIterator = Legion::RectInDomainIterator; +template +using PointInDomainIterator = Legion::PointInDomainIterator; + +// Machine + +using Processor = Legion::Processor; +using Memory = Legion::Memory; + +// Reduction operators + +template +using SumReduction = Legion::SumReduction; +template +using DiffReduction = Legion::DiffReduction; +template +using ProdReduction = Legion::ProdReduction; +template +using DivReduction = Legion::DivReduction; +template +using MaxReduction = Legion::MaxReduction; +template +using MinReduction = Legion::MinReduction; +template +using OrReduction = Legion::OrReduction; +template +using AndReduction = Legion::AndReduction; +template +using XorReduction = Legion::XorReduction; } // namespace legate diff --git a/src/legate.h b/src/legate.h index 14171e1c1..7e53e9a45 100644 --- a/src/legate.h +++ b/src/legate.h @@ -23,6 +23,7 @@ #include "core/data/store.h" #include "core/legate_c.h" #include "core/runtime/runtime.h" +#include "core/task/registrar.h" #include "core/task/task.h" #include "core/utilities/deserializer.h" #include "core/utilities/dispatch.h" diff --git a/src/legate_defines.h b/src/legate_defines.h index de272dde0..7c08bc3b1 100644 --- a/src/legate_defines.h +++ b/src/legate_defines.h @@ -52,5 +52,11 @@ #endif #endif +#ifdef LEGION_BOUNDS_CHECKS +#define LEGATE_BOUNDS_CHECKS +#endif + +#define LEGATE_MAX_DIM LEGION_MAX_DIM + // TODO: 2022-10-04: Work around a Legion bug, by not instantiating futures on framebuffer. #define LEGATE_NO_FUTURES_ON_FB