Skip to content

Commit

Permalink
UCT/IB: Hold rcache refcnt for UMR contig memh
Browse files Browse the repository at this point in the history
  • Loading branch information
brminich committed Apr 3, 2020
1 parent d0de5ef commit 9dc9e85
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 15 deletions.
3 changes: 2 additions & 1 deletion src/ucp/core/ucp_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_request_memory_reg,
status = ucp_mem_rereg_mds(context, UCS_BIT(md_idx),
buffer + s->lb_displ,
s->extent,
UCT_MD_MEM_ACCESS_ALL, NULL,
UCT_MD_MEM_ACCESS_ALL |
UCT_MD_MEM_FLAG_NC_BASE, NULL,
UCS_MEMORY_TYPE_HOST, NULL,
state->dt.struct_dt.contig.memh,
&state->dt.struct_dt.contig.md_map);
Expand Down
1 change: 1 addition & 0 deletions src/uct/api/uct.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ enum uct_md_mem_flags {
UCT_MD_MEM_ACCESS_REMOTE_GET = UCS_BIT(6), /**< enable remote get access */
UCT_MD_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(7), /**< enable remote atomic access */
UCT_MD_MEM_FLAG_EMPTY = UCS_BIT(8), /**< Create empty handle (for UMR) */
UCT_MD_MEM_FLAG_NC_BASE = UCS_BIT(9), /**< Used by UMR */

/** enable local and remote access for all operations */
UCT_MD_MEM_ACCESS_ALL = (UCT_MD_MEM_ACCESS_REMOTE_PUT|
Expand Down
20 changes: 6 additions & 14 deletions src/uct/ib/base/ib_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -896,20 +896,6 @@ static ucs_status_t uct_ib_mem_rcache_reg(uct_md_h uct_md, void *address,
ucs_status_t status;
uct_ib_mem_t *memh;

char *ptr = getenv("PMIX_RANK");
if(!strcmp(ptr, "0")){
static int count = 0;

printf("ALLOC: addr=%p, size=%zu, count=%d\n", address, length, count++);
fflush(stdout);
#if 1
static int delay = 1;
while( ((count - 1) == 16) && delay ) {
sleep(1);
}
#endif
}

status = ucs_rcache_get(md->rcache, address, length, PROT_READ|PROT_WRITE,
&flags, &rregion);
if (status != UCS_OK) {
Expand All @@ -925,6 +911,12 @@ if(!strcmp(ptr, "0")){
if (flags & UCT_MD_MEM_ACCESS_REMOTE_ATOMIC) {
memh->flags |= UCT_IB_MEM_ACCESS_REMOTE_ATOMIC;
}

if (flags & UCT_MD_MEM_FLAG_NC_BASE) {
/* This region is used by UMR */
ucs_rcache_region_hold(md->rcache, rregion);
}

*memh_p = memh;
return UCS_OK;
}
Expand Down
6 changes: 6 additions & 0 deletions src/uct/ib/mlx5/exp/ib_exp_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ typedef struct uct_ib_mlx5_mem {
} uct_ib_mlx5_mem_t;

struct uct_ib_umr {
uct_ib_mlx5_md_t *md;
unsigned depth;
int is_inline;
uct_ib_mlx5_mem_t memh; /* memh for indirect mr*/
uct_ib_mlx5_mem_t *contig_memh;
struct ibv_exp_send_wr wr;
size_t repeat_count; /* 0 is not allowed; if 1 it is UMR
list, otherwise repeated block */
Expand Down Expand Up @@ -381,11 +383,13 @@ uct_ib_mlx5_exp_umr_alloc(uct_ib_mlx5_md_t *md, const uct_iov_t *iov,
}
memset(&umr->wr, 0, sizeof(umr->wr));

umr->md = md;
umr->repeat_count = repeat_count;
umr->depth = umr_depth;
umr->iov_count = iov_count;
umr->comp.count = 1; /* for async reg */
umr->memh.umr = umr;
umr->contig_memh = ucs_derived_of(iov->memh, uct_ib_mlx5_mem_t); /* assume all iovs use the same memh for now */

if (repeat_count == 1) { /* MRs list */
status = uct_ib_mlx5_exp_umr_fill_region(umr, iov, iov_count);
Expand Down Expand Up @@ -552,6 +556,8 @@ uct_ib_mlx5_exp_umr_deregister(uct_ib_mem_t *memh, struct ibv_qp *qp,
}
}

umr->md->super.super.ops->mem_dereg(&umr->md->super.super, &umr->contig_memh->super);

ucs_free(umr);

return UCS_OK;
Expand Down

0 comments on commit 9dc9e85

Please sign in to comment.