Skip to content

Commit

Permalink
Merge pull request #2590 from jjhursey/topic/osc-pt2pt-1-thread-fixes
Browse files Browse the repository at this point in the history
Topic/osc pt2pt 1 thread fixes
  • Loading branch information
jjhursey authored Dec 16, 2016
2 parents d8c1a3d + eec1d5b commit ced245d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 4 deletions.
8 changes: 7 additions & 1 deletion ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2010-2016 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Expand Down Expand Up @@ -227,6 +227,12 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
/* haven't processed any post messages yet */
sync->sync_expected = sync->num_peers;

/* If the previous epoch was from Fence, then eager_send_active is still
* set to true at this time, but it shoulnd't be true until we get our
* incoming Posts. So reset to 'false' for this new epoch.
*/
sync->eager_send_active = false;

OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_start entering with group size %d...",
sync->num_peers));
Expand Down
23 changes: 21 additions & 2 deletions ompi/mca/osc/pt2pt/osc_pt2pt_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 FUJITSU LIMITED. All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -336,7 +337,16 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_

if (is_long_msg) {
/* wait for eager sends to be active before starting a long put */
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) {
OPAL_THREAD_LOCK(&pt2pt_sync->lock);
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) {
opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock);
}
OPAL_THREAD_UNLOCK(&pt2pt_sync->lock);
} else {
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
}
}

OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
Expand Down Expand Up @@ -495,7 +505,16 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,

if (is_long_msg) {
/* wait for synchronization before posting a long message */
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) {
OPAL_THREAD_LOCK(&pt2pt_sync->lock);
ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) {
opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock);
}
OPAL_THREAD_UNLOCK(&pt2pt_sync->lock);
} else {
ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
}
}

header = (ompi_osc_pt2pt_header_acc_t*) ptr;
Expand Down
12 changes: 11 additions & 1 deletion ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2010-2016 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
Expand Down Expand Up @@ -421,6 +421,16 @@ static int ompi_osc_pt2pt_unlock_internal (int target, ompi_win_t *win)
/* wait for unlock acks. this signals remote completion of fragments */
ompi_osc_pt2pt_sync_wait_expected (lock);

/* It is possible for the unlock to finish too early before the data
* is actually present in the recv buffer (for non-contiguous datatypes)
* So make sure to wait for all of the fragments to arrive.
*/
OPAL_THREAD_LOCK(&module->lock);
while (module->outgoing_frag_count < module->outgoing_frag_signal_count) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);

OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_unlock: unlock of %d complete", target));
} else {
Expand Down

0 comments on commit ced245d

Please sign in to comment.