Skip to content

Commit

Permalink
comm: add facility to store persistent requests in hash
Browse files Browse the repository at this point in the history
Maintain a per-comm hash for persistent requests so we can check and
free them at MPI_Comm_disconnect or MPI_Finalize if user forget to free
them.

We need apply per-vci critical sections to prevent data race.

Reference: mpi-forum/mpi-issues#710.
  • Loading branch information
raffenet authored and hzhou committed Jul 15, 2023
1 parent d04502e commit d670af6
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/include/mpir_comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ struct MPIR_Comm {
} multiplex;
} stream_comm;

MPIR_Request *persistent_requests;

/* Other, device-specific information */
#ifdef MPID_DEV_COMM_DECL
MPID_DEV_COMM_DECL
Expand Down Expand Up @@ -369,6 +371,10 @@ int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_
#define MPIR_Comm_rank(comm_ptr) ((comm_ptr)->rank)
#define MPIR_Comm_size(comm_ptr) ((comm_ptr)->local_size)

int MPIR_Comm_save_inactive_request(MPIR_Comm * comm, MPIR_Request * request);
int MPIR_Comm_delete_inactive_request(MPIR_Comm * comm, MPIR_Request * request);
int MPIR_Comm_free_inactive_requests(MPIR_Comm * comm);

/* Comm hint registration.
*
* Hint function is optional. If it is NULL, MPIR_layer will set corresponding
Expand Down
1 change: 1 addition & 0 deletions src/include/mpir_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ struct MPIR_Request {
#endif /* HAVE_DEBUGGER_SUPPORT */

struct MPIR_Request *next, *prev;
UT_hash_handle hh;

/* Other, device-specific information */
#ifdef MPID_DEV_REQUEST_DECL
Expand Down
36 changes: 36 additions & 0 deletions src/mpi/comm/commutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ int MPII_Comm_init(MPIR_Comm * comm_p)
comm_p->threadcomm = NULL;
MPIR_stream_comm_init(comm_p);

comm_p->persistent_requests = NULL;

/* mutex is only used in VCI granularity. But the overhead of
* creation is low, so we always create it. */
{
Expand Down Expand Up @@ -1338,3 +1340,37 @@ int MPII_Comm_is_node_balanced(MPIR_Comm * comm, int *num_nodes, bool * node_bal
fn_fail:
goto fn_exit;
}

int MPIR_Comm_save_inactive_request(MPIR_Comm * comm, MPIR_Request * request)
{
MPID_THREAD_CS_ENTER(VCI, comm->mutex);
HASH_ADD_INT(comm->persistent_requests, handle, request, MPL_MEM_COMM);
MPID_THREAD_CS_EXIT(VCI, comm->mutex);

return MPI_SUCCESS;
}

int MPIR_Comm_delete_inactive_request(MPIR_Comm * comm, MPIR_Request * request)
{
MPID_THREAD_CS_ENTER(VCI, comm->mutex);
HASH_DEL(comm->persistent_requests, request);
MPID_THREAD_CS_EXIT(VCI, comm->mutex);

return MPI_SUCCESS;
}

int MPIR_Comm_free_inactive_requests(MPIR_Comm * comm)
{
MPIR_Request *request, *tmp;
MPID_THREAD_CS_ENTER(VCI, comm->mutex);
HASH_ITER(hh, comm->persistent_requests, request, tmp) {
if (!MPIR_Request_is_active(request)) {
printf("WARNING: freeing inactive persistent request %x on communicator %x.\n",
request->handle, comm->handle);
MPIR_Request_free_impl(request);
}
}
MPID_THREAD_CS_EXIT(VCI, comm->mutex);

return MPI_SUCCESS;
}

0 comments on commit d670af6

Please sign in to comment.