Skip to content

Commit

Permalink
fix cpu communicator for omp (nv-legate#352)
Browse files Browse the repository at this point in the history
* fix cpu communicator for omp

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix for pre-commit

* add tunable variable (num cpus/omps/gpus) into Runtime

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
eddy16112 and pre-commit-ci[bot] authored Aug 31, 2022
1 parent 3345086 commit 7a858e0
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 1 deletion.
5 changes: 4 additions & 1 deletion legate/core/communicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ def __init__(self, runtime: Runtime) -> None:
)
self._init_cpucoll = library.LEGATE_CORE_INIT_CPUCOLL_TASK_ID
self._finalize_cpucoll = library.LEGATE_CORE_FINALIZE_CPUCOLL_TASK_ID
self._tag = library.LEGATE_CPU_VARIANT
if runtime.num_omps > 0:
self._tag = library.LEGATE_OMP_VARIANT
else:
self._tag = library.LEGATE_CPU_VARIANT
self._needs_barrier = False

def destroy(self) -> None:
Expand Down
31 changes: 31 additions & 0 deletions legate/core/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,25 @@ def __init__(self, core_library: CoreLib) -> None:
)
)

self._num_cpus = int(
self._core_context.get_tunable(
legion.LEGATE_CORE_TUNABLE_TOTAL_CPUS,
ty.int32,
)
)
self._num_omps = int(
self._core_context.get_tunable(
legion.LEGATE_CORE_TUNABLE_TOTAL_OMPS,
ty.int32,
)
)
self._num_gpus = int(
self._core_context.get_tunable(
legion.LEGATE_CORE_TUNABLE_TOTAL_GPUS,
ty.int32,
)
)

# Now we initialize managers
self._attachment_manager = AttachmentManager(self)
self._partition_manager = PartitionManager(self)
Expand Down Expand Up @@ -935,6 +954,18 @@ def core_library(self) -> Any:
def empty_argmap(self) -> ArgumentMap:
return self._empty_argmap

@property
def num_cpus(self) -> int:
return self._num_cpus

@property
def num_omps(self) -> int:
return self._num_omps

@property
def num_gpus(self) -> int:
return self._num_gpus

@property
def attachment_manager(self) -> AttachmentManager:
return self._attachment_manager
Expand Down
15 changes: 15 additions & 0 deletions src/core/comm/comm_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,21 @@ void register_tasks(Legion::Machine machine,
make_registrar(finalize_cpucoll_task_id, finalize_cpucoll_task_name, Processor::LOC_PROC);
runtime->register_task_variant<finalize_cpucoll>(registrar, LEGATE_CPU_VARIANT);
}
{
auto registrar = make_registrar(
init_cpucoll_mapping_task_id, init_cpucoll_mapping_task_name, Processor::OMP_PROC);
runtime->register_task_variant<int, init_cpucoll_mapping>(registrar, LEGATE_OMP_VARIANT);
}
{
auto registrar =
make_registrar(init_cpucoll_task_id, init_cpucoll_task_name, Processor::OMP_PROC);
runtime->register_task_variant<coll::CollComm, init_cpucoll>(registrar, LEGATE_OMP_VARIANT);
}
{
auto registrar =
make_registrar(finalize_cpucoll_task_id, finalize_cpucoll_task_name, Processor::OMP_PROC);
runtime->register_task_variant<finalize_cpucoll>(registrar, LEGATE_OMP_VARIANT);
}
}

} // namespace cpu
Expand Down
1 change: 1 addition & 0 deletions src/core/legate_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef enum legate_core_shard_id_t {

typedef enum legate_core_tunable_t {
LEGATE_CORE_TUNABLE_TOTAL_CPUS = 12345,
LEGATE_CORE_TUNABLE_TOTAL_OMPS,
LEGATE_CORE_TUNABLE_TOTAL_GPUS,
LEGATE_CORE_TUNABLE_NUM_PIECES,
LEGATE_CORE_TUNABLE_MIN_SHARD_VOLUME,
Expand Down
19 changes: 19 additions & 0 deletions src/core/mapping/core_mapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ void CoreMapper::select_task_options(const MapperContext ctx, const Task& task,
if (task.tag == LEGATE_CPU_VARIANT) {
assert(!local_cpus.empty());
output.initial_proc = local_cpus.front();
} else if (task.tag == LEGATE_OMP_VARIANT) {
assert(!local_omps.empty());
output.initial_proc = local_omps.front();
} else {
assert(task.tag == LEGATE_GPU_VARIANT);
assert(!local_gpus.empty());
Expand Down Expand Up @@ -296,6 +299,18 @@ void CoreMapper::slice_task(const MapperContext ctx,
}
break;
}
case Processor::OMP_PROC: {
for (Domain::DomainPointIterator itr(input.domain); itr; itr++) {
const Point<1> point = itr.p;
assert(point[0] >= start);
assert(point[0] < (start + chunk));
const unsigned local_index = point[0] - start;
assert(local_index < local_omps.size());
output.slices.push_back(TaskSlice(
Domain(itr.p, itr.p), local_omps[local_index], false /*recurse*/, false /*stealable*/));
}
break;
}
default: LEGATE_ABORT;
}
}
Expand Down Expand Up @@ -375,6 +390,10 @@ void CoreMapper::select_tunable_value(const MapperContext ctx,
pack_tunable<int32_t>(local_gpus.size() * total_nodes, output); // assume symmetry
return;
}
case LEGATE_CORE_TUNABLE_TOTAL_OMPS: {
pack_tunable<int32_t>(local_omps.size() * total_nodes, output); // assume symmetry
return;
}
case LEGATE_CORE_TUNABLE_NUM_PIECES: {
if (!local_gpus.empty()) // If we have GPUs, use those
pack_tunable<int32_t>(local_gpus.size() * total_nodes, output);
Expand Down
3 changes: 3 additions & 0 deletions typings/legion_cffi/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ LEGATE_CORE_TUNABLE_FIELD_REUSE_SIZE: int
LEGATE_CORE_TUNABLE_FIELD_REUSE_FREQUENCY: int
LEGATE_CORE_TUNABLE_MAX_PENDING_EXCEPTIONS: int
LEGATE_CORE_TUNABLE_PRECISE_EXCEPTION_TRACE: int
LEGATE_CORE_TUNABLE_TOTAL_CPUS: int
LEGATE_CORE_TUNABLE_TOTAL_OMPS: int
LEGATE_CORE_TUNABLE_TOTAL_GPUS: int
LEGATE_CORE_TUNABLE_NUM_PIECES: int
LEGATE_CORE_TUNABLE_MIN_SHARD_VOLUME: int
LEGATE_CORE_TUNABLE_NCCL_NEEDS_BARRIER: int
Expand Down

0 comments on commit 7a858e0

Please sign in to comment.