Skip to content

Commit

Permalink
PMIx_Connect usage: add optional timeout
Browse files Browse the repository at this point in the history
Add an MCA parameter that can be used to set a timeot on the PMIx_Connect
operation used to support MPI_Comm_accept/connect and relatives.

Related to open-mpi#8958

Signed-off-by: Howard Pritchard <[email protected]>
  • Loading branch information
hppritcha committed May 13, 2021
1 parent 0e76855 commit 038291a
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
9 changes: 7 additions & 2 deletions ompi/dpm/dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -104,7 +106,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
bool dense, isnew;
opal_process_name_t pname;
opal_list_t ilist, mlist, rlist;
pmix_info_t info;
pmix_info_t info, tinfo;
pmix_value_t pval;
pmix_pdata_t pdat;
pmix_proc_t *procs, pxproc;
Expand Down Expand Up @@ -373,7 +375,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
/* tell the host RTE to connect us - this will download
* all known data for the nspace's of participating procs
* so that add_procs will not result in a slew of lookups */
pret = PMIx_Connect(procs, nprocs, NULL, 0);
PMIX_INFO_CONSTRUCT(&tinfo);
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
pret = PMIx_Connect(procs, nprocs, &tinfo, 1);
PMIX_INFO_DESTRUCT(&tinfo);
PMIX_PROC_FREE(procs, nprocs);
rc = opal_pmix_convert_status(pret);
if (OPAL_SUCCESS != rc) {
Expand Down
10 changes: 10 additions & 0 deletions ompi/runtime/ompi_mpi_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
* All rights reserved.
* Copyright (c) 2016-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -83,6 +85,7 @@ bool ompi_mpi_compat_mpi3 = false;

char *ompi_mpi_spc_attach_string = NULL;
bool ompi_mpi_spc_dump_enabled = false;
uint32_t ompi_pmix_connect_timeout;

static bool show_default_mca_params = false;
static bool show_file_mca_params = false;
Expand Down Expand Up @@ -391,6 +394,13 @@ int ompi_mpi_register_params(void)
&ompi_mpi_spc_dump_enabled);
#endif // SPC_ENABLE

ompi_pmix_connect_timeout = 0; /* infinite timeout - see PMIx standard */
(void) mca_base_var_register ("ompi", "mpi", NULL, "pmix_connect_timeout",
"Timeout(secs) for calls to PMIx_Connect. Default is no timeout.",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_pmix_connect_timeout);

return OMPI_SUCCESS;
}

Expand Down
6 changes: 6 additions & 0 deletions ompi/runtime/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -172,6 +174,10 @@ OMPI_DECLSPEC extern char * ompi_mpi_spc_attach_string;
*/
OMPI_DECLSPEC extern bool ompi_mpi_spc_dump_enabled;

/**
* Timeout for calls to PMIx_Connect(defaut 0, no timeout)
*/
OMPI_DECLSPEC extern uint32_t ompi_pmix_connect_timeout;

/**
* Register MCA parameters used by the MPI layer.
Expand Down

0 comments on commit 038291a

Please sign in to comment.