From eed2f530ad81ad815d491a25797ec20e59445983 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 27 Aug 2021 09:49:50 -0400 Subject: [PATCH] Make sure opal_start_thread always spawns pthreads Users of `opal_start_thread` (btl/tcp, ft, smcuda, progress thread) may spawn threads that may block in functions unaware of argobots or qthreads (e.g., libevent or read(3)). If we spawn an argobot or qthread instead of a pthread the thread executing the argobot or qthread (potentially the main thread) blocks, leading to a deadlock situation. Open MPI expects the semantics of a pthread so we should handle all internal threads as such. Signed-off-by: Joseph Schuchart (cherry picked from commit e3ca132cc222746a14618032a9e37c139b77a38f) --- .../argobots/threads_argobots_module.c | 74 +---------------- .../argobots/threads_argobots_threads.h | 11 +-- opal/mca/threads/base/Makefile.am | 1 + opal/mca/threads/base/create_join.c | 83 +++++++++++++++++++ .../pthreads/threads_pthreads_module.c | 51 +----------- .../pthreads/threads_pthreads_threads.h | 9 +- .../qthreads/threads_qthreads_module.c | 70 +--------------- .../qthreads/threads_qthreads_threads.h | 12 +-- opal/mca/threads/threads.h | 9 +- 9 files changed, 98 insertions(+), 222 deletions(-) create mode 100644 opal/mca/threads/base/create_join.c diff --git a/opal/mca/threads/argobots/threads_argobots_module.c b/opal/mca/threads/argobots/threads_argobots_module.c index 708a7da645a..50acb60d8ce 100644 --- a/opal/mca/threads/argobots/threads_argobots_module.c +++ b/opal/mca/threads/argobots/threads_argobots_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -33,78 +33,6 @@ #include "opal/util/output.h" #include "opal/util/sys_limits.h" -/* - * Constructor - */ -static void opal_thread_construct(opal_thread_t *t) -{ - t->t_run = 0; - t->t_handle = ABT_THREAD_NULL; -} - -OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); - -static inline ABT_thread opal_thread_get_argobots_self(void) -{ - ABT_thread self; - ABT_thread_self(&self); - return self; -} - -static void opal_thread_argobots_wrapper(void *arg) -{ - opal_thread_t *t = (opal_thread_t *) arg; - t->t_ret = ((void *(*) (void *) ) t->t_run)(t); -} - -opal_thread_t *opal_thread_get_self(void) -{ - opal_threads_argobots_ensure_init(); - opal_thread_t *t = OBJ_NEW(opal_thread_t); - t->t_handle = opal_thread_get_argobots_self(); - return t; -} - -bool opal_thread_self_compare(opal_thread_t *t) -{ - opal_threads_argobots_ensure_init(); - return opal_thread_get_argobots_self() == t->t_handle; -} - -int opal_thread_join(opal_thread_t *t, void **thr_return) -{ - int rc = ABT_thread_free(&t->t_handle); - if (thr_return) { - *thr_return = t->t_ret; - } - t->t_handle = ABT_THREAD_NULL; - return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR; -} - -void opal_thread_set_main() -{ -} - -int opal_thread_start(opal_thread_t *t) -{ - opal_threads_argobots_ensure_init(); - int rc; - if (OPAL_ENABLE_DEBUG) { - if (NULL == t->t_run || ABT_THREAD_NULL != t->t_handle) { - return OPAL_ERR_BAD_PARAM; - } - } - - ABT_xstream self_xstream; - ABT_xstream_self(&self_xstream); - rc = ABT_thread_create_on_xstream(self_xstream, opal_thread_argobots_wrapper, t, - ABT_THREAD_ATTR_NULL, &t->t_handle); - - return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR; -} - -OBJ_CLASS_DECLARATION(opal_thread_t); - int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor) { opal_threads_argobots_ensure_init(); diff --git a/opal/mca/threads/argobots/threads_argobots_threads.h b/opal/mca/threads/argobots/threads_argobots_threads.h index 649553adcbd..417ad3d5884 100644 --- a/opal/mca/threads/argobots/threads_argobots_threads.h +++ b/opal/mca/threads/argobots/threads_argobots_threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, @@ -27,15 +27,6 @@ #define OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H #include "opal/mca/threads/argobots/threads_argobots.h" -#include - -struct opal_thread_t { - opal_object_t super; - opal_thread_fn_t t_run; - void *t_arg; - ABT_thread t_handle; - void *t_ret; -}; /* Argobots are cooperatively scheduled so yield when idle */ #define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true diff --git a/opal/mca/threads/base/Makefile.am b/opal/mca/threads/base/Makefile.am index fcf7c0b3a67..691a9b7f747 100644 --- a/opal/mca/threads/base/Makefile.am +++ b/opal/mca/threads/base/Makefile.am @@ -22,5 +22,6 @@ headers += \ base/base.h libmca_threads_la_SOURCES += \ + base/create_join.c \ base/threads_base.c \ base/tsd.c diff --git a/opal/mca/threads/base/create_join.c b/opal/mca/threads/base/create_join.c new file mode 100644 index 00000000000..e134f085930 --- /dev/null +++ b/opal/mca/threads/base/create_join.c @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2021 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include "opal/constants.h" +#include "opal/mca/threads/threads.h" +#include "opal/mca/threads/tsd.h" +#include "opal/prefetch.h" +#include "opal/util/output.h" +#include "opal/util/sys_limits.h" + +/* + * Constructor + */ +static void opal_thread_construct(opal_thread_t *t) +{ + t->t_run = 0; + t->t_handle = (pthread_t) -1; +} + +OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); + +int opal_thread_start(opal_thread_t *t) +{ + int rc; + + if (OPAL_ENABLE_DEBUG) { + if (NULL == t->t_run || (pthread_t) -1 != t->t_handle) { + return OPAL_ERR_BAD_PARAM; + } + } + + rc = pthread_create(&t->t_handle, NULL, (void *(*) (void *) ) t->t_run, t); + + return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; +} + +int opal_thread_join(opal_thread_t *t, void **thr_return) +{ + int rc = pthread_join(t->t_handle, thr_return); + t->t_handle = (pthread_t) -1; + return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; +} + +bool opal_thread_self_compare(opal_thread_t *t) +{ + return pthread_self() == t->t_handle; +} + +opal_thread_t *opal_thread_get_self(void) +{ + opal_thread_t *t = OBJ_NEW(opal_thread_t); + t->t_handle = pthread_self(); + return t; +} + +void opal_thread_set_main(void) +{ +} diff --git a/opal/mca/threads/pthreads/threads_pthreads_module.c b/opal/mca/threads/pthreads/threads_pthreads_module.c index 70dec2964b6..ac09b71d53d 100644 --- a/opal/mca/threads/pthreads/threads_pthreads_module.c +++ b/opal/mca/threads/pthreads/threads_pthreads_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -32,58 +32,9 @@ #include "opal/util/output.h" #include "opal/util/sys_limits.h" -/* - * Constructor - */ -static void opal_thread_construct(opal_thread_t *t) -{ - t->t_run = 0; - t->t_handle = (pthread_t) -1; -} - -OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); - -int opal_thread_start(opal_thread_t *t) -{ - int rc; - - if (OPAL_ENABLE_DEBUG) { - if (NULL == t->t_run || (pthread_t) -1 != t->t_handle) { - return OPAL_ERR_BAD_PARAM; - } - } - - rc = pthread_create(&t->t_handle, NULL, (void *(*) (void *) ) t->t_run, t); - - return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; -} - -int opal_thread_join(opal_thread_t *t, void **thr_return) -{ - int rc = pthread_join(t->t_handle, thr_return); - t->t_handle = (pthread_t) -1; - return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; -} - -bool opal_thread_self_compare(opal_thread_t *t) -{ - return pthread_self() == t->t_handle; -} - -opal_thread_t *opal_thread_get_self(void) -{ - opal_thread_t *t = OBJ_NEW(opal_thread_t); - t->t_handle = pthread_self(); - return t; -} - int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor) { int rc; rc = pthread_key_create(key, destructor); return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; } - -void opal_thread_set_main(void) -{ -} diff --git a/opal/mca/threads/pthreads/threads_pthreads_threads.h b/opal/mca/threads/pthreads/threads_pthreads_threads.h index 3cd7a3dae14..4bdb3710715 100644 --- a/opal/mca/threads/pthreads/threads_pthreads_threads.h +++ b/opal/mca/threads/pthreads/threads_pthreads_threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, @@ -32,13 +32,6 @@ #include "opal/mca/threads/pthreads/threads_pthreads.h" #include "opal/mca/threads/threads.h" -struct opal_thread_t { - opal_object_t super; - opal_thread_fn_t t_run; - void *t_arg; - pthread_t t_handle; -}; - /* Pthreads do not need to yield when idle */ #define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false diff --git a/opal/mca/threads/qthreads/threads_qthreads_module.c b/opal/mca/threads/qthreads/threads_qthreads_module.c index a5dc24674a2..7dca13f5f4f 100644 --- a/opal/mca/threads/qthreads/threads_qthreads_module.c +++ b/opal/mca/threads/qthreads/threads_qthreads_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -51,74 +51,6 @@ static inline void self_key_ensure_init(void) /* opal_thread_self_key has been already initialized. */ } -/* - * Constructor - */ -static void opal_thread_construct(opal_thread_t *t) -{ - t->t_run = 0; - t->t_thread_ret = 0; -} - -OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL); - -static inline aligned_t *opal_thread_get_qthreads_self(void) -{ - self_key_ensure_init(); - void *ptr = qthread_getspecific(opal_thread_self_key); - return (aligned_t *) ptr; -} - -static aligned_t opal_thread_qthreads_wrapper(void *arg) -{ - opal_thread_t *t = (opal_thread_t *) arg; - - /* Register itself. */ - self_key_ensure_init(); - qthread_setspecific(opal_thread_self_key, t->t_thread_ret_ptr); - - t->t_ret = ((void *(*) (void *) ) t->t_run)(t); - return 0; -} - -opal_thread_t *opal_thread_get_self(void) -{ - opal_threads_ensure_init_qthreads(); - opal_thread_t *t = OBJ_NEW(opal_thread_t); - t->t_thread_ret_ptr = opal_thread_get_qthreads_self(); - return t; -} - -bool opal_thread_self_compare(opal_thread_t *t) -{ - opal_threads_ensure_init_qthreads(); - return opal_thread_get_qthreads_self() == &t->t_thread_ret; -} - -int opal_thread_join(opal_thread_t *t, void **thr_return) -{ - qthread_readFF(NULL, t->t_thread_ret_ptr); - if (thr_return) { - *thr_return = t->t_ret; - } - t->t_thread_ret = 0; - return OPAL_SUCCESS; -} - -void opal_thread_set_main(void) -{ -} - -int opal_thread_start(opal_thread_t *t) -{ - opal_threads_ensure_init_qthreads(); - t->t_thread_ret_ptr = &t->t_thread_ret; - qthread_fork(opal_thread_qthreads_wrapper, t, &t->t_thread_ret); - return OPAL_SUCCESS; -} - -OBJ_CLASS_DECLARATION(opal_thread_t); - int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor) { opal_threads_ensure_init_qthreads(); diff --git a/opal/mca/threads/qthreads/threads_qthreads_threads.h b/opal/mca/threads/qthreads/threads_qthreads_threads.h index 1969c558c3d..58630f4023a 100644 --- a/opal/mca/threads/qthreads/threads_qthreads_threads.h +++ b/opal/mca/threads/qthreads/threads_qthreads_threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2020 High Performance Computing Center Stuttgart, @@ -27,16 +27,6 @@ #define OPAL_MCA_THREADS_QTHREADS_THREADS_QTHREADS_THREADS_H 1 #include "opal/mca/threads/qthreads/threads_qthreads.h" -#include - -struct opal_thread_t { - opal_object_t super; - opal_thread_fn_t t_run; - void *t_arg; - void *t_ret; - aligned_t t_thread_ret; - aligned_t *t_thread_ret_ptr; -}; /* Qthreads are cooperatively scheduled so yield when idle */ #define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true diff --git a/opal/mca/threads/threads.h b/opal/mca/threads/threads.h index 7e168380666..0f78830cdad 100644 --- a/opal/mca/threads/threads.h +++ b/opal/mca/threads/threads.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -45,6 +45,13 @@ typedef void *(*opal_thread_fn_t)(opal_object_t *); #include MCA_threads_base_include_HEADER +struct opal_thread_t { + opal_object_t super; + opal_thread_fn_t t_run; + void *t_arg; + pthread_t t_handle; +}; + typedef struct opal_thread_t opal_thread_t; OBJ_CLASS_DECLARATION(opal_thread_t);