From 9dc9e8519a7b685f5d920337f68b6b51dda1df88 Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Fri, 3 Apr 2020 12:40:30 +0300 Subject: [PATCH 1/2] UCT/IB: Hold rcache refcnt for UMR contig memh --- src/ucp/core/ucp_request.c | 3 ++- src/uct/api/uct.h | 1 + src/uct/ib/base/ib_md.c | 20 ++++++-------------- src/uct/ib/mlx5/exp/ib_exp_md.c | 6 ++++++ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/ucp/core/ucp_request.c b/src/ucp/core/ucp_request.c index 68e31997e5c..a83a24e5691 100644 --- a/src/ucp/core/ucp_request.c +++ b/src/ucp/core/ucp_request.c @@ -277,7 +277,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_request_memory_reg, status = ucp_mem_rereg_mds(context, UCS_BIT(md_idx), buffer + s->lb_displ, s->extent, - UCT_MD_MEM_ACCESS_ALL, NULL, + UCT_MD_MEM_ACCESS_ALL | + UCT_MD_MEM_FLAG_NC_BASE, NULL, UCS_MEMORY_TYPE_HOST, NULL, state->dt.struct_dt.contig.memh, &state->dt.struct_dt.contig.md_map); diff --git a/src/uct/api/uct.h b/src/uct/api/uct.h index 8305707cf6e..ec482fee995 100644 --- a/src/uct/api/uct.h +++ b/src/uct/api/uct.h @@ -664,6 +664,7 @@ enum uct_md_mem_flags { UCT_MD_MEM_ACCESS_REMOTE_GET = UCS_BIT(6), /**< enable remote get access */ UCT_MD_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(7), /**< enable remote atomic access */ UCT_MD_MEM_FLAG_EMPTY = UCS_BIT(8), /**< Create empty handle (for UMR) */ + UCT_MD_MEM_FLAG_NC_BASE = UCS_BIT(9), /**< Used by UMR */ /** enable local and remote access for all operations */ UCT_MD_MEM_ACCESS_ALL = (UCT_MD_MEM_ACCESS_REMOTE_PUT| diff --git a/src/uct/ib/base/ib_md.c b/src/uct/ib/base/ib_md.c index 8c1318d45bb..e1092f5a74f 100644 --- a/src/uct/ib/base/ib_md.c +++ b/src/uct/ib/base/ib_md.c @@ -896,20 +896,6 @@ static ucs_status_t uct_ib_mem_rcache_reg(uct_md_h uct_md, void *address, ucs_status_t status; uct_ib_mem_t *memh; -char *ptr = getenv("PMIX_RANK"); -if(!strcmp(ptr, "0")){ - static int count = 0; - - printf("ALLOC: addr=%p, size=%zu, count=%d\n", address, length, count++); - fflush(stdout); -#if 1 - static int delay = 1; - while( ((count - 1) == 16) && delay ) { - sleep(1); - } -#endif -} - status = ucs_rcache_get(md->rcache, address, length, PROT_READ|PROT_WRITE, &flags, &rregion); if (status != UCS_OK) { @@ -925,6 +911,12 @@ if(!strcmp(ptr, "0")){ if (flags & UCT_MD_MEM_ACCESS_REMOTE_ATOMIC) { memh->flags |= UCT_IB_MEM_ACCESS_REMOTE_ATOMIC; } + + if (flags & UCT_MD_MEM_FLAG_NC_BASE) { + /* This region is used by UMR */ + ucs_rcache_region_hold(md->rcache, rregion); + } + *memh_p = memh; return UCS_OK; } diff --git a/src/uct/ib/mlx5/exp/ib_exp_md.c b/src/uct/ib/mlx5/exp/ib_exp_md.c index 9504789301b..8dc41441d2c 100644 --- a/src/uct/ib/mlx5/exp/ib_exp_md.c +++ b/src/uct/ib/mlx5/exp/ib_exp_md.c @@ -31,9 +31,11 @@ typedef struct uct_ib_mlx5_mem { } uct_ib_mlx5_mem_t; struct uct_ib_umr { + uct_ib_mlx5_md_t *md; unsigned depth; int is_inline; uct_ib_mlx5_mem_t memh; /* memh for indirect mr*/ + uct_ib_mlx5_mem_t *contig_memh; struct ibv_exp_send_wr wr; size_t repeat_count; /* 0 is not allowed; if 1 it is UMR list, otherwise repeated block */ @@ -381,11 +383,13 @@ uct_ib_mlx5_exp_umr_alloc(uct_ib_mlx5_md_t *md, const uct_iov_t *iov, } memset(&umr->wr, 0, sizeof(umr->wr)); + umr->md = md; umr->repeat_count = repeat_count; umr->depth = umr_depth; umr->iov_count = iov_count; umr->comp.count = 1; /* for async reg */ umr->memh.umr = umr; + umr->contig_memh = ucs_derived_of(iov->memh, uct_ib_mlx5_mem_t); /* assume all iovs use the same memh for now */ if (repeat_count == 1) { /* MRs list */ status = uct_ib_mlx5_exp_umr_fill_region(umr, iov, iov_count); @@ -552,6 +556,8 @@ uct_ib_mlx5_exp_umr_deregister(uct_ib_mem_t *memh, struct ibv_qp *qp, } } + umr->md->super.super.ops->mem_dereg(&umr->md->super.super, &umr->contig_memh->super); + ucs_free(umr); return UCS_OK; From c060ccdeab783b56c783eb34c8d5467c10395783 Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Fri, 3 Apr 2020 12:45:38 +0300 Subject: [PATCH 2/2] UCP/DT: Add stats for struct dt --- src/ucp/core/ucp_request.c | 2 ++ src/ucp/dt/dt_struct.c | 24 ++++++++++++++++++++++++ src/ucp/dt/dt_struct.h | 8 ++++++++ 3 files changed, 34 insertions(+) diff --git a/src/ucp/core/ucp_request.c b/src/ucp/core/ucp_request.c index a83a24e5691..a428ed69ddc 100644 --- a/src/ucp/core/ucp_request.c +++ b/src/ucp/core/ucp_request.c @@ -268,6 +268,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_request_memory_reg, s, buffer, nc_memh); /* SET memh properly */ state->dt.struct_dt.non_contig.memh[0] = nc_memh; + UCS_STATS_UPDATE_COUNTER(s->stats, UCP_DT_STRUCT_STAT_IN_CACHE, 1); return UCS_OK; } @@ -304,6 +305,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_request_memory_reg, if (status != UCS_OK) { goto err; } + UCS_STATS_UPDATE_COUNTER(s->stats, UCP_DT_STRUCT_STAT_CREATE, 1); break; default: diff --git a/src/ucp/dt/dt_struct.c b/src/ucp/dt/dt_struct.c index bd55794c4ad..26113b4c2c5 100644 --- a/src/ucp/dt/dt_struct.c +++ b/src/ucp/dt/dt_struct.c @@ -22,6 +22,17 @@ #include #include +#if ENABLE_STATS +static ucs_stats_class_t ucp_dt_struct_stats_class = { + .name = "dt_struct", + .num_counters = UCP_DT_STRUCT_STAT_LAST, + .counter_names = { + [UCP_DT_STRUCT_STAT_CREATE] = "create", + [UCP_DT_STRUCT_STAT_IN_CACHE] = "reuse" + } +}; +#endif + ucs_status_t _struct_register_ep_rec(uct_ep_h ep, void *buf, ucp_dt_struct_t *s, uct_mem_h contig_memh, uct_mem_h* memh); @@ -222,6 +233,7 @@ ucs_status_t ucp_dt_create_struct(ucp_struct_dt_desc_t *desc_ptr, size_t desc_count, size_t rep_count, ucp_datatype_t *datatype_p) { + ucs_status_t status; ucp_dt_struct_t *dt; size_t i; @@ -269,6 +281,17 @@ ucs_status_t ucp_dt_create_struct(ucp_struct_dt_desc_t *desc_ptr, _set_struct_attributes(dt); *datatype_p = ((uintptr_t)dt) | UCP_DATATYPE_STRUCT; + + + status = UCS_STATS_NODE_ALLOC(&dt->stats, + &ucp_dt_struct_stats_class, + ucs_stats_get_root(), "%p-%d-%d", + dt, desc_count, rep_count); + if (status != UCS_OK) { + ucs_error("Can't allocate stats: %s", ucs_status_string(status)); + return status; + } + ucs_info("Created struct dt %p, len %ld (step %ld), depth %ld, uct_iovs %ld, rep count %ld", dt, dt->len, dt->step_len, dt->depth, dt->uct_iov_count, dt->rep_count); @@ -290,6 +313,7 @@ void ucp_dt_destroy_struct(ucp_datatype_t datatype_p) }) kh_destroy_inplace(dt_struct, &dt->hash); ucs_free(dt->desc); + UCS_STATS_NODE_FREE(dt->stats); ucs_free(dt); } diff --git a/src/ucp/dt/dt_struct.h b/src/ucp/dt/dt_struct.h index ee925b7c6f5..cc9ed593502 100644 --- a/src/ucp/dt/dt_struct.h +++ b/src/ucp/dt/dt_struct.h @@ -11,6 +11,7 @@ #include #include #include +#include typedef struct ucp_dt_struct_hash_value { uct_md_h md; @@ -36,6 +37,12 @@ int main() { } */ +enum { + UCP_DT_STRUCT_STAT_CREATE, + UCP_DT_STRUCT_STAT_IN_CACHE, + UCP_DT_STRUCT_STAT_LAST +}; + /** * Structured datatype structure. */ @@ -48,6 +55,7 @@ typedef struct ucp_dt_struct { size_t extent; /* total contig space covering the whole type */ ptrdiff_t lb_displ; /* the lowest displacement from which extent is effective */ khash_t(dt_struct) hash; + UCS_STATS_NODE_DECLARE(stats); } ucp_dt_struct_t; static inline ucp_dt_struct_t* ucp_dt_struct(ucp_datatype_t datatype)