From ceb1eed37cff82d4a6226f6c1e7f34e3157e0f25 Mon Sep 17 00:00:00 2001 From: yosefe Date: Mon, 10 Nov 2014 10:03:02 +0200 Subject: [PATCH 1/4] JENKINS: don't modify JUB_URL --- contrib/test_jenkins.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/test_jenkins.sh b/contrib/test_jenkins.sh index 488958cea4d..1166c0c33d9 100755 --- a/contrib/test_jenkins.sh +++ b/contrib/test_jenkins.sh @@ -6,10 +6,10 @@ if [ -z "$BUILD_NUMBER" ]; then echo Running interactive WORKSPACE=$PWD BUILD_NUMBER=1 - JOB_URL=file://$WORKSPACE + WS_URL=file://$WORKSPACE else echo Running under jenkins - JOB_URL=$JOB_URL/ws + WS_URL=$JOB_URL/ws fi rpm_topdir=$WORKSPACE/rpm-dist @@ -57,7 +57,7 @@ nerrors=$(cov-analyze --dir $cov_build |grep "Defect occurrences found" | awk '{ cov-format-errors --dir $cov_build rc=$(($rc+$nerrors)) -cov_url="$JOB_URL/$cov_build_id/c/output/errors/index.html" +cov_url="$WS_URL/$cov_build_id/c/output/errors/index.html" rm -f jenkins_sidelinks.txt echo 1..1 > coverity.tap if [ $nerrors -gt 0 ]; then From 7c908f8a9e9f8589d44e46dff57ae002ef2f3726 Mon Sep 17 00:00:00 2001 From: yosefe Date: Fri, 14 Nov 2014 14:54:42 +0200 Subject: [PATCH 2/4] UCS: Fix typo --- src/ucs/stats/stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ucs/stats/stats.c b/src/ucs/stats/stats.c index d2da176426f..03f621c5d69 100644 --- a/src/ucs/stats/stats.c +++ b/src/ucs/stats/stats.c @@ -447,19 +447,19 @@ int ucs_stats_is_active() #else -void mxm_stats_init() +void ucs_stats_init() { } -void mxm_stats_cleanup() +void ucs_stats_cleanup() { } -void mxm_stats_dump() +void ucs_stats_dump() { } -int mxm_stats_is_active() +int ucs_stats_is_active() { return 0; } From 199a51af81f1b10b950ef09a84b9dbd45781ca06 Mon Sep 17 00:00:00 2001 From: yosefe Date: Sat, 15 Nov 2014 15:43:45 +0200 Subject: [PATCH 3/4] IB/TEST: Implement put_short() for message rate. + Poll for RC completions + Check if there is room in th RC QP before posting + Implement iface_flush() + Add message rate performance tests + Implement test warmup cycles + rename "hw_name" to "dev_name" --- src/uct/Makefile.am | 1 + src/uct/api/tl.h | 17 +++- src/uct/api/uct.h | 18 +++- src/uct/ib/base/ib_context.c | 2 +- src/uct/ib/base/ib_device.c | 17 ++-- src/uct/ib/base/ib_device.h | 6 +- src/uct/ib/base/ib_iface.c | 20 ++-- src/uct/ib/base/ib_iface.h | 2 +- src/uct/ib/base/ib_verbs.h | 16 ++++ src/uct/ib/mlx5/ib_mlx5.c | 3 + src/uct/ib/mlx5/ib_mlx5.h | 10 ++ src/uct/ib/rc/rc_def.h | 20 ++++ src/uct/ib/rc/rc_ep.c | 7 +- src/uct/ib/rc/rc_ep.h | 12 ++- src/uct/ib/rc/rc_iface.c | 75 ++++++--------- src/uct/ib/rc/rc_iface.h | 19 ++-- src/uct/ib/rc/rc_mlx5.c | 145 ++++++++++++++++++++++++----- src/uct/ib/rc/rc_mlx5.h | 13 ++- src/uct/tl/context.c | 9 +- test/gtest/Makefile.am | 12 +-- test/gtest/uct/test_uct_context.cc | 9 +- test/gtest/uct/test_uct_perf.cc | 84 +++++++++++------ test/perf/Makefile.am | 9 +- test/perf/libperf.c | 137 ++++++++++++++++++++++----- test/perf/libperf.h | 2 +- test/perf/perftest.c | 20 ++-- test/test.c | 26 ------ 27 files changed, 494 insertions(+), 217 deletions(-) create mode 100644 src/uct/ib/rc/rc_def.h delete mode 100644 test/test.c diff --git a/src/uct/Makefile.am b/src/uct/Makefile.am index 5fb9271f732..74f141730c1 100644 --- a/src/uct/Makefile.am +++ b/src/uct/Makefile.am @@ -49,6 +49,7 @@ endif if HAVE_TL_RC noinst_HEADERS += \ + ib/rc/rc_def.h \ ib/rc/rc_ep.h \ ib/rc/rc_iface.h diff --git a/src/uct/api/tl.h b/src/uct/api/tl.h index 7c4b9a264f5..643b8f071af 100644 --- a/src/uct/api/tl.h +++ b/src/uct/api/tl.h @@ -24,8 +24,8 @@ * Communication resource. */ typedef struct uct_resource_desc { - char tl_name[UCT_MAX_NAME_LEN]; /* Transport name */ - char hw_name[UCT_MAX_NAME_LEN]; /* Hardware resource name */ + char tl_name[UCT_MAX_NAME_LEN]; /* Transport name */ + char dev_name[UCT_MAX_NAME_LEN]; /* Hardware device name */ uint64_t latency; /* Latency, nanoseconds */ size_t bandwidth; /* Bandwidth, bytes/second */ cpu_set_t local_cpus; /* Mask of CPUs near the resource */ @@ -48,6 +48,12 @@ struct uct_ep_addr { typedef void (*uct_completion_cb_t)(uct_req_h req, ucs_status_t status); +typedef struct uct_callback uct_callback_t; +struct uct_callback { + void (*cb)(uct_callback_t *self, ucs_status_t status); +}; + + /** * Remote key release function. */ @@ -92,7 +98,7 @@ typedef struct uct_tl_ops { uct_resource_desc_t **resources_p, unsigned *num_resources_p); - ucs_status_t (*iface_open)(uct_context_h context, const char *hw_name, + ucs_status_t (*iface_open)(uct_context_h context, const char *dev_name, uct_iface_h *iface_p); ucs_status_t (*rkey_unpack)(uct_context_h context, void *rkey_buffer, @@ -146,8 +152,8 @@ typedef struct uct_iface_ops { uct_ep_addr_t *ep_addr); ucs_status_t (*ep_put_short)(uct_ep_h ep, void *buffer, unsigned length, - uint64_t remote_addr, uct_rkey_t rkey, - uct_req_h *req_p, uct_completion_cb_t cb); + uint64_t remote_addr, uct_rkey_t rkey); + } uct_iface_ops_t; @@ -156,6 +162,7 @@ typedef struct uct_iface_ops { */ typedef struct uct_pd { uct_pd_ops_t *ops; + uct_context_h context; } uct_pd_t; diff --git a/src/uct/api/uct.h b/src/uct/api/uct.h index c5a4f623f0b..4d303eae48a 100644 --- a/src/uct/api/uct.h +++ b/src/uct/api/uct.h @@ -32,6 +32,15 @@ ucs_status_t uct_init(uct_context_h *context_p); void uct_cleanup(uct_context_h context); +/** + * @ingroup CONTEXT + * @brief Progress all communications of the context. + * + * @param [in] context Handle to context. + */ +void uct_progress(uct_context_h context); + + /** * @ingroup CONTEXT * @brief Query for transport resources. @@ -63,13 +72,13 @@ void uct_release_resource_list(uct_resource_desc_t *resources); * * @param [in] context Handle to context. * @param [in] tl_name Transport name. - * @param [in] hw_name Hardware resource name, + * @param [in] dev_name Hardware device name, * @param [out] iface_p Filled with a handle to opened communication interface. * * @return Error code. */ ucs_status_t uct_iface_open(uct_context_h context, const char *tl_name, - const char *hw_name, uct_iface_h *iface_p); + const char *dev_name, uct_iface_h *iface_p); /** @@ -169,10 +178,9 @@ static inline ucs_status_t uct_ep_connect_to_ep(uct_ep_h ep, uct_iface_addr_t *i } static inline ucs_status_t uct_ep_put_short(uct_ep_h ep, void *buffer, unsigned length, - uint64_t remote_addr, uct_rkey_t rkey, - uct_req_h *req_p, uct_completion_cb_t cb) + uint64_t remote_addr, uct_rkey_t rkey) { - return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey, req_p, cb); + return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey); } #endif diff --git a/src/uct/ib/base/ib_context.c b/src/uct/ib/base/ib_context.c index 50c3b86f3ea..58d1b4f868a 100644 --- a/src/uct/ib/base/ib_context.c +++ b/src/uct/ib/base/ib_context.c @@ -109,7 +109,7 @@ ucs_status_t uct_ib_init(uct_context_h context) /* TODO apply a user-defined regex/wildcard filter */ ibctx->num_devices = 0; for (i = 0; i < num_devices; ++i) { - status = uct_ib_device_create(device_list[i], + status = uct_ib_device_create(context, device_list[i], &ibctx->devices[ibctx->num_devices]); if (status != UCS_OK) { ucs_warn("Failed to initialize %s (%s), ignoring it", diff --git a/src/uct/ib/base/ib_device.c b/src/uct/ib/base/ib_device.c index 21b51b1187b..5cab3228e49 100644 --- a/src/uct/ib/base/ib_device.c +++ b/src/uct/ib/base/ib_device.c @@ -127,7 +127,9 @@ uct_pd_ops_t uct_ib_pd_ops = { .rkey_pack = uct_ib_rkey_pack, }; -ucs_status_t uct_ib_device_create(struct ibv_device *ibv_device, uct_ib_device_t **dev_p) +ucs_status_t uct_ib_device_create(uct_context_h context, + struct ibv_device *ibv_device, + uct_ib_device_t **dev_p) { struct ibv_context *ibv_context; struct ibv_exp_device_attr dev_attr; @@ -178,11 +180,12 @@ ucs_status_t uct_ib_device_create(struct ibv_device *ibv_device, uct_ib_device_t } /* Save device information */ - dev->super.ops = &uct_ib_pd_ops; - dev->ibv_context = ibv_context; - dev->dev_attr = dev_attr; - dev->first_port = first_port; - dev->num_ports = num_ports; + dev->super.ops = &uct_ib_pd_ops; + dev->super.context = context; + dev->ibv_context = ibv_context; + dev->dev_attr = dev_attr; + dev->first_port = first_port; + dev->num_ports = num_ports; /* Get device locality */ uct_ib_device_get_affinity(ibv_get_device_name(ibv_device), &dev->local_cpus); @@ -281,7 +284,7 @@ ucs_status_t uct_ib_device_port_get_resource(uct_ib_device_t *dev, uint8_t port_ int ret; /* HCA:Port is the hardware resource name */ - ucs_snprintf_zero(resource->hw_name, sizeof(resource->hw_name), "%s:%d", + ucs_snprintf_zero(resource->dev_name, sizeof(resource->dev_name), "%s:%d", uct_ib_device_name(dev), port_num); /* Port network address */ diff --git a/src/uct/ib/base/ib_device.h b/src/uct/ib/base/ib_device.h index d83e5397ffe..80fd6fe4574 100644 --- a/src/uct/ib/base/ib_device.h +++ b/src/uct/ib/base/ib_device.h @@ -23,11 +23,15 @@ struct uct_ib_device { uint8_t first_port; /* Number of first port (usually 1) */ uint8_t num_ports; /* Amount of physical ports */ cpu_set_t local_cpus; /* CPUs local to device */ + pthread_t async_thread; /* Async event thread */ + int stop_thread; struct ibv_exp_port_attr port_attr[0]; /* Cached port attributes */ }; -ucs_status_t uct_ib_device_create(struct ibv_device *ibv_device, uct_ib_device_t **dev_p); +ucs_status_t uct_ib_device_create(uct_context_h context, + struct ibv_device *ibv_device, + uct_ib_device_t **dev_p); void uct_ib_device_destroy(uct_ib_device_t *dev); diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 4a78fc4fbbd..ec167306032 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -16,25 +16,27 @@ static ucs_status_t uct_ib_iface_find_port(uct_ib_context_t *ibctx, uct_ib_iface_t *iface, - const char *hw_name) + const char *dev_name) { uct_ib_device_t *dev; - const char *dev_name; + const char *ibdev_name; unsigned port_num; unsigned dev_index; size_t devname_len; char *p; - p = strrchr(hw_name, ':'); + p = strrchr(dev_name, ':'); if (p == NULL) { - return UCS_ERR_INVALID_PARAM; /* Wrong hw_name format */ + return UCS_ERR_INVALID_PARAM; /* Wrong dev_name format */ } - devname_len = p - hw_name; + devname_len = p - dev_name; for (dev_index = 0; dev_index < ibctx->num_devices; ++dev_index) { dev = ibctx->devices[dev_index]; - dev_name = uct_ib_device_name(dev); - if ((strlen(dev_name) == devname_len) && !strncmp(dev_name, hw_name, devname_len)) { + ibdev_name = uct_ib_device_name(dev); + if ((strlen(ibdev_name) == devname_len) && + !strncmp(ibdev_name, dev_name, devname_len)) + { port_num = strtod(p + 1, &p); if (*p != '\0') { return UCS_ERR_INVALID_PARAM; /* Failed to parse port number */ @@ -54,14 +56,14 @@ static ucs_status_t uct_ib_iface_find_port(uct_ib_context_t *ibctx, } ucs_status_t ucs_ib_iface_init(uct_context_h context, uct_ib_iface_t *iface, - const char *hw_name) + const char *dev_name) { uct_ib_context_t *ibctx = ucs_component_get(context, ib, uct_ib_context_t); struct ibv_exp_port_attr *port_attr; uct_ib_device_t *dev; ucs_status_t status; - status = uct_ib_iface_find_port(ibctx, iface, hw_name); + status = uct_ib_iface_find_port(ibctx, iface, dev_name); if (status != UCS_OK) { goto err; } diff --git a/src/uct/ib/base/ib_iface.h b/src/uct/ib/base/ib_iface.h index b1df026be0c..77c9ce741f7 100644 --- a/src/uct/ib/base/ib_iface.h +++ b/src/uct/ib/base/ib_iface.h @@ -36,7 +36,7 @@ typedef struct uct_ib_iface { ucs_status_t ucs_ib_iface_init(uct_context_h context, uct_ib_iface_t *iface, - const char *hw_name); + const char *dev_name); void ucs_ib_iface_cleanup(uct_ib_iface_t *iface); static inline uct_ib_device_t * uct_ib_iface_device(uct_ib_iface_t *iface) diff --git a/src/uct/ib/base/ib_verbs.h b/src/uct/ib/base/ib_verbs.h index e95fe3462ce..57737fdc3f7 100644 --- a/src/uct/ib/base/ib_verbs.h +++ b/src/uct/ib/base/ib_verbs.h @@ -82,6 +82,13 @@ static inline struct ibv_mr *ibv_exp_reg_mr(struct ibv_exp_reg_mr_in *in) # define IBV_SHARED_MR_ACCESS_FLAGS(_shared_mr) ((_shared_mr)->exp_access) # define IBV_EXP_DEVICE_ATTR_SET_COMP_MASK(_attr) # define IBV_EXP_PORT_ATTR_SET_COMP_MASK(_attr) + +static inline int ibv_exp_cq_ignore_overrun(struct ibv_cq *cq) +{ + errno = ENOSYS; + return -1; +} + #else # define IBV_IS_MPAGES_AVAIL(_attr) ((_attr)->device_cap_flags2 & IBV_EXP_DEVICE_MR_ALLOCATE) # define IBV_DEVICE_HAS_DC(_attr) ((_attr)->exp_device_cap_flags & IBV_EXP_DEVICE_DC_TRANSPORT) @@ -89,6 +96,15 @@ static inline struct ibv_mr *ibv_exp_reg_mr(struct ibv_exp_reg_mr_in *in) # define IBV_SHARED_MR_ACCESS_FLAGS(_shared_mr) ((_shared_mr)->access) # define IBV_EXP_DEVICE_ATTR_SET_COMP_MASK(_attr) (_attr)->comp_mask = (IBV_EXP_DEVICE_ATTR_RESERVED - 1) # define IBV_EXP_PORT_ATTR_SET_COMP_MASK(_attr) (_attr)->comp_mask = 0 + +static inline int ibv_exp_cq_ignore_overrun(struct ibv_cq *cq) +{ + struct ibv_exp_cq_attr cq_attr = {0}; + cq_attr.comp_mask = IBV_EXP_CQ_ATTR_CQ_CAP_FLAGS; + cq_attr.cq_cap_flags = IBV_EXP_CQ_IGNORE_OVERRUN; + return ibv_exp_modify_cq(cq, &cq_attr, IBV_EXP_CQ_CAP_FLAGS); +} + #endif #endif diff --git a/src/uct/ib/mlx5/ib_mlx5.c b/src/uct/ib/mlx5/ib_mlx5.c index 1177a1488ef..5860716f605 100644 --- a/src/uct/ib/mlx5/ib_mlx5.c +++ b/src/uct/ib/mlx5/ib_mlx5.c @@ -7,6 +7,7 @@ #include "ib_mlx5.h" +#include #include #include @@ -16,6 +17,8 @@ ucs_status_t uct_ib_mlx5_get_qp_info(struct ibv_qp *qp, uct_ib_mlx5_qp_info_t *q struct mlx5_qp *mqp = ucs_container_of(qp, struct mlx5_qp, verbs_qp.qp); if ((mqp->sq.cur_post != 0) || (mqp->rq.head != 0) || mqp->bf->need_lock) { + ucs_warn("cur_post=%d head=%d need_lock=%d", mqp->sq.cur_post, + mqp->rq.head, mqp->bf->need_lock); return UCS_ERR_NO_DEVICE; } diff --git a/src/uct/ib/mlx5/ib_mlx5.h b/src/uct/ib/mlx5/ib_mlx5.h index 55ba8141a48..02e8d16dfff 100644 --- a/src/uct/ib/mlx5/ib_mlx5.h +++ b/src/uct/ib/mlx5/ib_mlx5.h @@ -63,4 +63,14 @@ void uct_ib_mlx5_update_cq_ci(struct ibv_cq *cq, unsigned cq_ci); void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av); +static inline int uct_ib_mlx5_cqe_hw_owned(struct mlx5_cqe64 *cqe, unsigned index, + unsigned cq_length) +{ + uint8_t op_own = cqe->op_own; + + return ((op_own & MLX5_CQE_OWNER_MASK) == !(index & cq_length) || + (op_own & 0xF0) == (MLX5_CQE_INVALID << 4)); +} + + #endif diff --git a/src/uct/ib/rc/rc_def.h b/src/uct/ib/rc/rc_def.h new file mode 100644 index 00000000000..8c2b7dedc5f --- /dev/null +++ b/src/uct/ib/rc/rc_def.h @@ -0,0 +1,20 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* $COPYRIGHT$ +* $HEADER$ +*/ + +#ifndef RC_DEF_H_ +#define RC_DEF_H_ + + +#define UCT_RC_TX_QP_LEN 128 +#define UCT_RC_QP_HASH_SIZE 256 + +typedef struct uct_rc_ep uct_rc_ep_t; +typedef struct uct_rc_iface uct_rc_iface_t; +typedef struct uct_rc_ep_addr uct_rc_ep_addr_t; + + +#endif diff --git a/src/uct/ib/rc/rc_ep.c b/src/uct/ib/rc/rc_ep.c index a4f774ae081..408d784f283 100644 --- a/src/uct/ib/rc/rc_ep.c +++ b/src/uct/ib/rc/rc_ep.c @@ -25,7 +25,7 @@ ucs_status_t uct_rc_ep_init(uct_rc_ep_t *ep) qp_init_attr.send_cq = iface->super.send_cq; qp_init_attr.recv_cq = iface->super.recv_cq; qp_init_attr.srq = NULL; /* TODO */ - qp_init_attr.cap.max_send_wr = 1024; + qp_init_attr.cap.max_send_wr = UCT_RC_TX_QP_LEN; qp_init_attr.cap.max_recv_wr = 1024; qp_init_attr.cap.max_send_sge = 2; qp_init_attr.cap.max_recv_sge = 1; @@ -40,6 +40,8 @@ ucs_status_t uct_rc_ep_init(uct_rc_ep_t *ep) goto err; } + ep->qp_num = ep->qp->qp_num; + uct_rc_iface_add_ep(iface, ep); return UCS_OK; err: @@ -48,8 +50,11 @@ ucs_status_t uct_rc_ep_init(uct_rc_ep_t *ep) void uct_rc_ep_cleanup(uct_rc_ep_t *ep) { + uct_rc_iface_t *iface = ucs_derived_of(ep->super.iface, uct_rc_iface_t); int ret; + uct_rc_iface_remove_ep(iface, ep); + ret = ibv_destroy_qp(ep->qp); if (ret != 0) { ucs_warn("ibv_destroy_qp() returned %d: %m", ret); diff --git a/src/uct/ib/rc/rc_ep.h b/src/uct/ib/rc/rc_ep.h index 6d81e632024..1d6ac0bc2fa 100644 --- a/src/uct/ib/rc/rc_ep.h +++ b/src/uct/ib/rc/rc_ep.h @@ -8,21 +8,23 @@ #ifndef UCT_RC_EP_H #define UCT_RC_EP_H -#include "rc_iface.h" +#include "rc_def.h" #include -typedef struct uct_rc_ep_addr { +struct uct_rc_ep_addr { uct_ep_addr_t super; uint32_t qp_num; -} uct_rc_ep_addr_t; +}; -typedef struct uct_rc_ep { +struct uct_rc_ep { uct_ep_t super; struct ibv_qp *qp; -} uct_rc_ep_t; + unsigned qp_num; + uct_rc_ep_t *next; +}; ucs_status_t uct_rc_ep_init(uct_rc_ep_t *ep); diff --git a/src/uct/ib/rc/rc_iface.c b/src/uct/ib/rc/rc_iface.c index 62f7a1896c0..2b85e0ace1c 100644 --- a/src/uct/ib/rc/rc_iface.c +++ b/src/uct/ib/rc/rc_iface.c @@ -12,49 +12,6 @@ #include -ucs_status_t uct_rc_iface_open(uct_context_h context, const char *hw_name, - uct_iface_h *iface_p) -{ - ucs_status_t status; - uct_rc_iface_t *iface; - - iface = ucs_malloc(sizeof(*iface), "rc iface"); - if (iface == NULL) { - return UCS_ERR_NO_MEMORY; - } - - iface->super.super.ops.iface_close = uct_rc_iface_close; - iface->super.super.ops.iface_get_address = uct_rc_iface_get_address; - iface->super.super.ops.iface_flush = uct_rc_iface_flush; - iface->super.super.ops.ep_get_address = uct_rc_ep_get_address; - iface->super.super.ops.ep_connect_to_iface = NULL; - iface->super.super.ops.ep_connect_to_ep = uct_rc_ep_connect_to_ep; - - status = ucs_ib_iface_init(context, &iface->super, hw_name); - if (status != UCS_OK) { - goto err_free; - } - - ucs_debug("opened RC dev %s port %d", - uct_ib_device_name(uct_ib_iface_device(&iface->super)), - iface->super.port_num); - - *iface_p = &iface->super.super; - return UCS_OK; - -err_free: - ucs_free(iface); - return status; -} - -void uct_rc_iface_close(uct_iface_h tl_iface) -{ - uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); - - ucs_ib_iface_cleanup(&iface->super); - ucs_free(iface); -} - void uct_rc_iface_query(uct_rc_iface_t *iface, uct_iface_attr_t *iface_attr) { iface_attr->max_short = 0; @@ -73,8 +30,34 @@ ucs_status_t uct_rc_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *if return UCS_OK; } -ucs_status_t uct_rc_iface_flush(uct_iface_h tl_iface, uct_req_h *req_p, - uct_completion_cb_t cb) +static inline int uct_rc_ep_compare(uct_rc_ep_t *ep1, uct_rc_ep_t *ep2) { - return UCS_OK; + return (int32_t)ep1->qp_num - (int32_t)ep2->qp_num; +} + +static inline unsigned uct_rc_ep_hash(uct_rc_ep_t *ep) +{ + return ep->qp_num; +} + +SGLIB_DEFINE_LIST_PROTOTYPES(uct_rc_ep_t, mxm_rc_ep_compare, next); +SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(uct_rc_ep_t, UCT_RC_QP_HASH_SIZE, mxm_rc_ep_hash); +SGLIB_DEFINE_LIST_FUNCTIONS(uct_rc_ep_t, uct_rc_ep_compare, next); +SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(uct_rc_ep_t, UCT_RC_QP_HASH_SIZE, uct_rc_ep_hash); + +uct_rc_ep_t *uct_rc_iface_lookup_ep(uct_rc_iface_t *iface, unsigned qp_num) +{ + uct_rc_ep_t tmp; + tmp.qp_num = qp_num; + return sglib_hashed_uct_rc_ep_t_find_member(iface->eps, &tmp); +} + +void uct_rc_iface_add_ep(uct_rc_iface_t *iface, uct_rc_ep_t *ep) +{ + sglib_hashed_uct_rc_ep_t_add(iface->eps, ep); +} + +void uct_rc_iface_remove_ep(uct_rc_iface_t *iface, uct_rc_ep_t *ep) +{ + sglib_hashed_uct_rc_ep_t_delete(iface->eps, ep); } diff --git a/src/uct/ib/rc/rc_iface.h b/src/uct/ib/rc/rc_iface.h index 7fe0e5952da..d4e3d1a6fbc 100644 --- a/src/uct/ib/rc/rc_iface.h +++ b/src/uct/ib/rc/rc_iface.h @@ -8,18 +8,17 @@ #ifndef UCT_RC_IFACE_H #define UCT_RC_IFACE_H +#include "rc_ep.h" + #include +#include -typedef struct uct_rc_iface { +struct uct_rc_iface { uct_ib_iface_t super; -} uct_rc_iface_t; - - -ucs_status_t uct_rc_iface_open(uct_context_h context, const char *hw_name, - uct_iface_h *iface_p); + uct_rc_ep_t *eps[UCT_RC_QP_HASH_SIZE]; +}; -void uct_rc_iface_close(uct_iface_h tl_iface); void uct_rc_iface_query(uct_rc_iface_t *iface, uct_iface_attr_t *iface_attr); @@ -28,7 +27,9 @@ ucs_status_t uct_rc_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *if ucs_status_t uct_rc_iface_flush(uct_iface_h iface, uct_req_h *req_p, uct_completion_cb_t cb); -ucs_status_t uct_rc_iface_flush(uct_iface_h iface, uct_req_h *req_p, - uct_completion_cb_t cb); +uct_rc_ep_t *uct_rc_iface_lookup_ep(uct_rc_iface_t *iface, unsigned qp_num); + +void uct_rc_iface_add_ep(uct_rc_iface_t *iface, uct_rc_ep_t *ep); +void uct_rc_iface_remove_ep(uct_rc_iface_t *iface, uct_rc_ep_t *ep); #endif diff --git a/src/uct/ib/rc/rc_mlx5.c b/src/uct/ib/rc/rc_mlx5.c index 894cd7bc558..c763612f46d 100644 --- a/src/uct/ib/rc/rc_mlx5.c +++ b/src/uct/ib/rc/rc_mlx5.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,7 @@ static ucs_status_t uct_rc_mlx5_ep_create(uct_iface_h tl_iface, uct_ep_h *ep_p) ep->tx.qend = qp_info.sq.buf + (MLX5_SEND_WQE_BB * qp_info.sq.wqe_cnt); ep->tx.seg = ep->tx.qstart; ep->tx.sw_pi = 0; - ep->tx.hw_ci = 0; + ep->tx.max_pi = UCT_RC_TX_QP_LEN; ep->tx.bf_reg = qp_info.bf.reg; ep->tx.bf_size = qp_info.bf.size; ep->tx.dbrec = &qp_info.dbrec[MLX5_SND_DBR]; @@ -100,23 +101,28 @@ static void uct_rc_mlx5_ep_destroy(uct_ep_h tl_ep) static ucs_status_t uct_rc_mlx5_ep_put_short(uct_ep_h tl_ep, void *buffer, unsigned length, uint64_t remote_addr, - uct_rkey_t rkey, uct_req_h *req_p, - uct_completion_cb_t cb) + uct_rkey_t rkey) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_ib_mlx5_wqe_rc_rdma_inl_seg_t *seg = ep->tx.seg; unsigned wqe_size; uint32_t sw_pi_16_n; - uint64_t *src, *dst; - unsigned i; + uint64_t *src, *dst, *end; + unsigned sw_pi, i; - sw_pi_16_n = htonl(ep->tx.sw_pi & 0xffff); - wqe_size = ((length + 15) / 16) + (sizeof(*seg) / 16); - ucs_assert(wqe_size < MLX5_SEND_WQE_BB); + sw_pi = ep->tx.sw_pi; + if (UCS_CIRCULAR_COMPARE32(sw_pi, >=, ep->tx.max_pi)) { + return UCS_ERR_WOULD_BLOCK; + } + + sw_pi_16_n = htonl(sw_pi & 0xffff); + wqe_size = (length + sizeof(*seg) + 15) / 16; + ucs_assert(wqe_size <= MLX5_SEND_WQE_BB); /* Build WQE */ seg->ctrl.opmod_idx_opcode = (sw_pi_16_n >> 8) | (MLX5_OPCODE_RDMA_WRITE << 24); seg->ctrl.qpn_ds = htonl(wqe_size) | ep->qpn_ds; + seg->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; /* Ask for completion */ seg->raddr.raddr = htonll(remote_addr); seg->raddr.rkey = (uint32_t)rkey; @@ -132,28 +138,69 @@ static ucs_status_t uct_rc_mlx5_ep_put_short(uct_ep_h tl_ep, void *buffer, /* Make sure that doorbell record is written before ringing the doorbell */ ucs_memory_bus_store_fence(); - /* BF copy */ + /* Set up copy pointers */ dst = ep->tx.bf_reg; src = (void*)seg; - for (i = 0; i < MLX5_SEND_WQE_BB / sizeof(*dst); ++i) { - *dst++ = *src++; - } + end = (void*)seg + sizeof(*seg) + length; - ucs_memory_bus_store_fence(); + /* BF copy */ + do { + for (i = 0; i < MLX5_SEND_WQE_BB / sizeof(*dst); ++i) { + *dst++ = *src++; + } + } while (src < end); /* Flip BF register */ ep->tx.bf_reg = (void*) ((uintptr_t) ep->tx.bf_reg ^ ep->tx.bf_size); + + /* Completion counters */ ++ep->tx.sw_pi; + ++ucs_derived_of(ep->super.super.iface, uct_rc_mlx5_iface_t)->tx.outstanding; /* Advance queue pointer */ - ep->tx.seg += MLX5_SEND_WQE_BB; - if (ep->tx.seg == ep->tx.qend) { + if (ucs_unlikely((ep->tx.seg += MLX5_SEND_WQE_BB) >= ep->tx.qend)) { ep->tx.seg = ep->tx.qstart; } return UCS_OK; } +static void uct_rc_mlx5_iface_progress(void *arg) +{ + uct_rc_mlx5_iface_t *iface = arg; + struct mlx5_cqe64 *cqe; + uct_rc_mlx5_ep_t *ep; + unsigned index, qp_num; + + index = iface->tx.cq_ci; + cqe = iface->tx.cq_buf + (index & (iface->tx.cq_length - 1)) * sizeof(struct mlx5_cqe64); + if (uct_ib_mlx5_cqe_hw_owned(cqe, index, iface->tx.cq_length)) { + return; /* CQ is empty */ + } + iface->tx.cq_ci = index + 1; + --iface->tx.outstanding; + + ucs_memory_cpu_load_fence(); + + qp_num = ntohl(cqe->sop_drop_qpn) & 0xffffff; + ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, qp_num), uct_rc_mlx5_ep_t); + ucs_assert(ep != NULL); + + ++ep->tx.max_pi; +} + +static ucs_status_t uct_rc_mlx5_iface_flush(uct_iface_h tl_iface, uct_req_h *req_p, + uct_completion_cb_t cb) +{ + uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_t); + + if (iface->tx.outstanding > 0) { + return UCS_ERR_WOULD_BLOCK; + } + + return UCS_OK; +} + static ucs_status_t uct_rc_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) { uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); @@ -163,24 +210,74 @@ static ucs_status_t uct_rc_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr return UCS_OK; } +static void uct_rc_mlx5_iface_close(uct_iface_h tl_iface) +{ + uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_t); + uct_context_h context = iface->super.super.super.pd->context; + + ucs_notifier_chain_remove(&context->progress_chain, uct_rc_mlx5_iface_progress, iface); + ucs_ib_iface_cleanup(&iface->super.super); + ucs_free(iface); +} + static ucs_status_t uct_rc_mlx5_iface_open(uct_context_h context, - const char *hw_name, + const char *dev_name, uct_iface_h *iface_p) { + uct_ib_mlx5_cq_info_t cq_info; + uct_rc_mlx5_iface_t *iface; ucs_status_t status; - uct_iface_h iface; + int ret; + + iface = ucs_malloc(sizeof(*iface), "rc mlx5 iface"); + if (iface == NULL) { + return UCS_ERR_NO_MEMORY; + } + + iface->super.super.super.ops.iface_close = uct_rc_mlx5_iface_close; + iface->super.super.super.ops.iface_get_address = uct_rc_iface_get_address; + iface->super.super.super.ops.iface_flush = uct_rc_mlx5_iface_flush; + iface->super.super.super.ops.ep_get_address = uct_rc_ep_get_address; + iface->super.super.super.ops.ep_connect_to_iface = NULL; + iface->super.super.super.ops.ep_connect_to_ep = uct_rc_ep_connect_to_ep; + iface->super.super.super.ops.iface_query = uct_rc_mlx5_iface_query; + iface->super.super.super.ops.ep_put_short = uct_rc_mlx5_ep_put_short; + iface->super.super.super.ops.ep_create = uct_rc_mlx5_ep_create; + iface->super.super.super.ops.ep_destroy = uct_rc_mlx5_ep_destroy; + + status = ucs_ib_iface_init(context, &iface->super.super, dev_name); + if (status != UCS_OK) { + goto err_free; + } + + ret = ibv_exp_cq_ignore_overrun(iface->super.super.send_cq); + if (ret != 0) { + ucs_error("Failed to modify send CQ to ignore overrun: %s", strerror(ret)); + status = UCS_ERR_UNSUPPORTED; + goto err_cleanup_ib_ep; + } - status = uct_rc_iface_open(context, hw_name, &iface); + status = uct_ib_mlx5_get_cq_info(iface->super.super.send_cq, &cq_info); if (status != UCS_OK) { - return status; + ucs_error("Failed to get mlx5 CQ information"); + goto err_cleanup_ib_ep; } - iface->ops.iface_query = uct_rc_mlx5_iface_query; - iface->ops.ep_put_short = uct_rc_mlx5_ep_put_short; - iface->ops.ep_create = uct_rc_mlx5_ep_create; - iface->ops.ep_destroy = uct_rc_mlx5_ep_destroy; - *iface_p = iface; + iface->tx.cq_buf = cq_info.buf; + iface->tx.cq_ci = 0; + iface->tx.cq_length = cq_info.cqe_cnt; + iface->tx.outstanding = 0; + + ucs_notifier_chain_add(&context->progress_chain, uct_rc_mlx5_iface_progress, + iface); + + *iface_p = &iface->super.super.super; return UCS_OK; + err_cleanup_ib_ep: + +err_free: + ucs_free(iface); + return status; } uct_tl_ops_t uct_rc_mlx5_tl_ops = { diff --git a/src/uct/ib/rc/rc_mlx5.h b/src/uct/ib/rc/rc_mlx5.h index 78699c4457c..03a114d7e33 100644 --- a/src/uct/ib/rc/rc_mlx5.h +++ b/src/uct/ib/rc/rc_mlx5.h @@ -10,13 +10,14 @@ #include "rc_ep.h" + typedef struct { uct_rc_ep_t super; unsigned qpn_ds; struct { unsigned sw_pi; - unsigned hw_ci; + unsigned max_pi; void *seg; void *bf_reg; unsigned long bf_size; @@ -27,5 +28,15 @@ typedef struct { } uct_rc_mlx5_ep_t; +typedef struct { + uct_rc_iface_t super; + struct { + void *cq_buf; + unsigned cq_ci; + unsigned cq_length; + unsigned outstanding; + } tx; +} uct_rc_mlx5_iface_t; + #endif diff --git a/src/uct/tl/context.c b/src/uct/tl/context.c index 5df5b80f513..a23341aaa9d 100644 --- a/src/uct/tl/context.c +++ b/src/uct/tl/context.c @@ -48,6 +48,11 @@ void uct_cleanup(uct_context_h context) ucs_free(context); } +void uct_progress(uct_context_h context) +{ + ucs_notifier_chain_call(&context->progress_chain); +} + ucs_status_t uct_register_tl(uct_context_h context, const char *tl_name, uct_tl_ops_t *tl_ops) { @@ -120,13 +125,13 @@ void uct_release_resource_list(uct_resource_desc_t *resources) } ucs_status_t uct_iface_open(uct_context_h context, const char *tl_name, - const char *hw_name, uct_iface_h *iface_p) + const char *dev_name, uct_iface_h *iface_p) { uct_context_tl_info_t *tl; for (tl = context->tls; tl < context->tls + context->num_tls; ++tl) { if (!strcmp(tl_name, tl->name)) { - return tl->ops->iface_open(context, hw_name, iface_p); + return tl->ops->iface_open(context, dev_name, iface_p); } } diff --git a/test/gtest/Makefile.am b/test/gtest/Makefile.am index 279fc54c2c9..b3e14c1d190 100644 --- a/test/gtest/Makefile.am +++ b/test/gtest/Makefile.am @@ -57,23 +57,23 @@ gtest_SOURCES = \ ucs/test_twheel.cc \ ucs/test_frag_list.cc -.PHONY: test test gdb valgrind fix_rpath uct +.PHONY: test test gdb valgrind fix_rpath ucx all-local: gtest -uct: - $(MAKE) -C $(top_builddir)/src/uct +ucx: + $(MAKE) -C $(top_builddir) -test: uct gtest +test: ucx gtest @rm -f core.* $(abs_builddir)/gtest --gtest_filter=$(GTEST_FILTER) -test_gdb: uct gtest +test_gdb: ucx gtest echo 'r' > .gdbcommands env UCS_HANDLE_ERRORS=none UCS_GDB_PATH="" gdb -x .gdbcommands --args $(GDB_ARGS) $(abs_builddir)/gtest --gtest_filter=$(GTEST_FILTER) list: gtest $(abs_builddir)/gtest --gtest_list_tests --gtest_filter=$(GTEST_FILTER) -test_valgrind: uct gtest +test_valgrind: ucx gtest env LD_LIBRARY_PATH="$(VALGRIND_LIBPATH):${LD_LIBRARY_PATH}" valgrind $(VALGRIND_ARGS) $(VALGRIND_EXTRA_ARGS) $(abs_builddir)/gtest --gtest_filter=$(GTEST_FILTER) diff --git a/test/gtest/uct/test_uct_context.cc b/test/gtest/uct/test_uct_context.cc index 208626204e5..0c75bd05b1c 100644 --- a/test/gtest/uct/test_uct_context.cc +++ b/test/gtest/uct/test_uct_context.cc @@ -32,11 +32,11 @@ UCS_TEST_F(test_uct, query_resources) { for (unsigned i = 0; i < num_resources; ++i) { uct_resource_desc_t *res = &resources[i]; EXPECT_TRUE(strcmp(res->tl_name, "")); - EXPECT_TRUE(strcmp(res->hw_name, "")); + EXPECT_TRUE(strcmp(res->dev_name, "")); EXPECT_GT(res->latency, (uint64_t)0); EXPECT_GT(res->bandwidth, (size_t)0); UCS_TEST_MESSAGE << i << ": " << res->tl_name << - " on " << res->hw_name << + " on " << res->dev_name << " at " << (res->bandwidth / 1024.0 / 1024.0) << " MB/sec"; } @@ -61,7 +61,7 @@ UCS_TEST_F(test_uct, open_iface) { for (unsigned i = 0; i < num_resources; ++i) { uct_iface_h iface = NULL; - status = uct_iface_open(ucth, resources[i].tl_name, resources[i].hw_name, + status = uct_iface_open(ucth, resources[i].tl_name, resources[i].dev_name, &iface); ASSERT_TRUE(iface != NULL); ASSERT_UCS_OK(status); @@ -151,8 +151,7 @@ class entity { void put8(uint64_t value, uint64_t address, uct_rkey_t rkey) { ucs_status_t status; - status = uct_ep_put_short(m_ep, &value, sizeof(value), address, rkey, - NULL, NULL); + status = uct_ep_put_short(m_ep, &value, sizeof(value), address, rkey); ASSERT_UCS_OK(status); } diff --git a/test/gtest/uct/test_uct_perf.cc b/test/gtest/uct/test_uct_perf.cc index 84ab0c6f59c..5227d31f3de 100644 --- a/test/gtest/uct/test_uct_perf.cc +++ b/test/gtest/uct/test_uct_perf.cc @@ -14,6 +14,7 @@ extern "C" { #include #include #include +#include class test_rte_comm { @@ -53,6 +54,10 @@ class test_rte { m_index(index), m_send(send), m_recv(recv) { } + unsigned index() const { + return m_index; + } + static unsigned group_size(void *rte_group) { return 2; @@ -61,7 +66,7 @@ class test_rte { static unsigned group_index(void *rte_group) { test_rte *self = reinterpret_cast(rte_group); - return self->m_index; + return self->index(); } static void barrier(void *rte_group) @@ -123,11 +128,14 @@ class test_uct_perf : public ucs::test { struct test_spec { const char *title; const char *units; + double min; + double max; ucx_perf_cmd_t command; ucx_perf_test_type_t test_type; size_t msglen; - double norm; + size_t iters; size_t field_offset; + double norm; }; test_uct_perf() { @@ -136,30 +144,42 @@ class test_uct_perf : public ucs::test { void init() { ucs::test::init(); - sched_getaffinity(getpid(), sizeof(m_orig_affinity), &m_orig_affinity); + } - const int max_cpus = sysconf(_SC_NPROCESSORS_CONF); - int num_cpus = 0, first_cpu = -1; + static void set_affinity(unsigned index) { + std::vector cpus; + cpu_set_t affinity; + unsigned my_cpu; + int ret, nr_cpus; - for (int cpu = 0; cpu < sysconf(_SC_NPROCESSORS_CONF); ++cpu) { - if (CPU_ISSET(cpu, &m_orig_affinity)) { - ++num_cpus; - first_cpu = cpu; - } + ret = sched_getaffinity(getpid(), sizeof(m_orig_affinity), &affinity); + if (ret != 0) { + ucs_error("Failed to get CPU affinity: %m"); + throw ucs::test_abort_exception(); } - ucs_assert_always(num_cpus > 0 && first_cpu != -1); + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus < 0) { + ucs_error("Failed to get CPU count: %m"); + throw ucs::test_abort_exception(); + } - if (num_cpus < 2) { - unsigned next_cpu = (first_cpu + 1) % max_cpus; - UCS_TEST_MESSAGE << "Changing CPU affinity to " << first_cpu << - "," << next_cpu; + for (int cpu = 0; cpu < nr_cpus; ++cpu) { + if (CPU_ISSET(cpu, &affinity)) { + cpus.push_back(cpu); + } + } - cpu_set_t affinity = m_orig_affinity; - CPU_SET(next_cpu , &affinity); - sched_setaffinity(getpid(), sizeof(affinity), &affinity); + if (index < cpus.size()) { + my_cpu = cpus[index]; + } else { + my_cpu = cpus.back() + (index - cpus.size()) + 1; } + + CPU_ZERO(&affinity); + CPU_SET(my_cpu , &affinity); + sched_setaffinity(ucs_get_tid(), sizeof(affinity), &affinity); } void cleanup() { @@ -176,6 +196,7 @@ class test_uct_perf : public ucs::test { status = uct_init(&ucth); ASSERT_UCS_OK(status); + set_affinity(reinterpret_cast(params->rte_group)->index()); result = new ucx_perf_result_t(); @@ -186,7 +207,7 @@ class test_uct_perf : public ucs::test { return result; } - ucx_perf_result_t run_multi_threaded(const test_spec &test, const char *hw_name, + ucx_perf_result_t run_multi_threaded(const test_spec &test, const char *dev_name, const char *tl_name) { test_rte_comm c0to1, c1to0; @@ -199,7 +220,7 @@ class test_uct_perf : public ucs::test { params.flags = UCX_PERF_TEST_FLAG_WARMUP; params.message_size = test.msglen; params.alignment = ucs_get_page_size(); - params.max_iter = 400000l; + params.max_iter = test.iters; params.max_time = 0.0; params.report_interval = 1.0; params.rte_group = NULL; @@ -245,8 +266,13 @@ class test_uct_perf : public ucs::test { test_uct_perf::test_spec test_uct_perf::tests[] = { - { "put latency", "usec", UCX_PERF_TEST_CMD_PUT_SHORT, UCX_PERF_TEST_TYPE_PINGPONG, - 8, 1e6, ucs_offsetof(ucx_perf_result_t, latency.total_average) }, + { "put latency", "usec", 0.0, 1.0, + UCX_PERF_TEST_CMD_PUT_SHORT, UCX_PERF_TEST_TYPE_PINGPONG, 8, 100000l, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6 }, + + { "put msgrate", "Mpps", 6.0, 20.0, + UCX_PERF_TEST_CMD_PUT_SHORT, UCX_PERF_TEST_TYPE_STREAM_UNI, 8, 2000000l, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6 }, { NULL } }; @@ -256,16 +282,18 @@ UCS_TEST_F(test_uct_perf, envelope) { UCS_TEST_SKIP; } - const char *hw_name = "mlx5_0:1"; - const char *tl_name = "rc_mlx5"; + const char *dev_name = "mlx5_0:1"; + const char *tl_name = "rc_mlx5"; for (test_uct_perf::test_spec *test = tests; test->title != NULL; ++test) { - ucx_perf_result_t result = run_multi_threaded(*test, hw_name, tl_name); - double value = *(double*)( (char*)&result + test->field_offset); + ucx_perf_result_t result = run_multi_threaded(*test, dev_name, tl_name); + double value = *(double*)( (char*)&result + test->field_offset) * test->norm; UCS_TEST_MESSAGE << boost::format("%s/%s %15s : %.3f %s") % tl_name - % hw_name + % dev_name % test->title - % (value * test->norm) + % value % test->units; + EXPECT_GE(value, test->min); + EXPECT_LT(value, test->max); } } diff --git a/test/perf/Makefile.am b/test/perf/Makefile.am index 1b4a3ed8ec9..ba784674162 100644 --- a/test/perf/Makefile.am +++ b/test/perf/Makefile.am @@ -46,11 +46,14 @@ ucx_perftest$(EXEEXT): perftest.o $(ucx_perftest_LDADD) perftest.o $(ucx_perftest_LDFLAGS) $(ucx_perftest_LDADD) -o $@ if HAVE_MPIRUN -.PHONY: test help +.PHONY: ucx test help -MPI_ARGS = -n 2 -map-by node +MPI_ARGS = -n 2 -map-by node -mca pml ob1 -mca btl self,tcp,sm -test: ucx_perftest +ucx: + $(MAKE) -C $(top_builddir) + +test: ucx ucx_perftest $(MPIRUN) $(MPI_ARGS) ucx_perftest$(EXEEXT) $(TEST_ARGS) help: diff --git a/test/perf/libperf.c b/test/perf/libperf.c index 0afe64c44b1..d2df772a55a 100644 --- a/test/perf/libperf.c +++ b/test/perf/libperf.c @@ -124,16 +124,20 @@ static ucs_time_t __find_median_quick_select(ucs_time_t arr[], int n) static ucs_status_t ucx_perf_test_init(ucx_perf_context_t *perf, ucx_perf_test_params_t *params) { - unsigned i; - - perf->params = *params; - - perf->send_buffer = memalign(perf->params.alignment, perf->params.message_size); - perf->recv_buffer = memalign(perf->params.alignment, perf->params.message_size); + perf->send_buffer = memalign(params->alignment, params->message_size); + perf->recv_buffer = memalign(params->alignment, params->message_size); if (perf->send_buffer == NULL || perf->recv_buffer == NULL) { return UCS_ERR_NO_MEMORY; } + return UCS_OK; +} +static void ucx_perf_test_reset(ucx_perf_context_t *perf, + ucx_perf_test_params_t *params) +{ + unsigned i; + + perf->params = *params; perf->start_time = ucs_get_time(); perf->prev_time = perf->start_time; perf->end_time = (perf->params.max_time == 0.0) ? UINT64_MAX : @@ -153,8 +157,6 @@ static ucs_status_t ucx_perf_test_init(ucx_perf_context_t *perf, for (i = 0; i < TIMING_QUEUE_SIZE; ++i) { perf->timing_queue[i] = 0; } - - return UCS_OK; } void ucx_perf_test_cleanup(ucx_perf_context_t *perf) @@ -253,7 +255,27 @@ static inline void ucx_perf_update(ucx_perf_context_t *perf, ucx_perf_counter_t } } -static ucs_status_t ucx_perf_run_put_lat(uct_perf_context_t *perf) +static inline void uct_perf_put_short_b(uct_ep_h ep, void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_perf_context_t *perf) +{ + ucs_status_t status; + + status = uct_ep_put_short(ep, buffer, length, remote_addr, rkey); + while (status == UCS_ERR_WOULD_BLOCK) { + uct_progress(perf->context); + status = uct_ep_put_short(ep, buffer, length, remote_addr, rkey); + } +} + +static void uct_perf_iface_flush_b(uct_perf_context_t *perf) +{ + while (uct_iface_flush(perf->iface, NULL, NULL) == UCS_ERR_WOULD_BLOCK) { + uct_progress(perf->context); + } +} + +static ucs_status_t uct_perf_run_put_short_lat(uct_perf_context_t *perf) { volatile uint8_t *send_sn, *recv_sn; unsigned my_index; @@ -264,6 +286,10 @@ static ucs_status_t ucx_perf_run_put_lat(uct_perf_context_t *perf) uct_rkey_t rkey; uint8_t sn; + if (perf->super.params.message_size < 1) { + return UCS_ERR_INVALID_PARAM; + } + recv_sn = (uint8_t*)perf->super.recv_buffer + perf->super.params.message_size - 1; send_sn = (uint8_t*)perf->super.send_buffer + perf->super.params.message_size - 1; @@ -275,14 +301,15 @@ static ucs_status_t ucx_perf_run_put_lat(uct_perf_context_t *perf) buffer = perf->super.send_buffer; length = perf->super.params.message_size; - sn = 0; + *send_sn = sn = 0; if (my_index == 0) { ep = perf->peers[1].ep; remote_addr = perf->peers[1].remote_addr; rkey = perf->peers[1].rkey.rkey; UCX_PERF_TEST_FOREACH(&perf->super) { while (*recv_sn != sn); - uct_ep_put_short(ep, buffer,length, remote_addr, rkey, NULL, NULL); + uct_perf_put_short_b(ep, buffer, length, remote_addr, rkey, perf); + uct_progress(perf->context); *send_sn = ++sn; ucx_perf_update(&perf->super, 1, length); } @@ -291,7 +318,8 @@ static ucs_status_t ucx_perf_run_put_lat(uct_perf_context_t *perf) remote_addr = perf->peers[0].remote_addr; rkey = perf->peers[0].rkey.rkey; UCX_PERF_TEST_FOREACH(&perf->super) { - uct_ep_put_short(ep, buffer,length, remote_addr, rkey, NULL, NULL); + uct_perf_put_short_b(ep, buffer,length, remote_addr, rkey, perf); + uct_progress(perf->context); while (*recv_sn != sn); *send_sn = ++sn; ucx_perf_update(&perf->super, 1, length); @@ -300,6 +328,45 @@ static ucs_status_t ucx_perf_run_put_lat(uct_perf_context_t *perf) return UCS_OK; } +static ucs_status_t uct_perf_run_put_short_bw(uct_perf_context_t *perf) +{ + unsigned long remote_addr; + uct_rkey_t rkey; + void *buffer; + volatile uint8_t *ptr; + unsigned length; + uct_ep_h ep; + + if (perf->super.params.message_size < 1) { + return UCS_ERR_INVALID_PARAM; + } + + *(uint8_t*)perf->super.recv_buffer = 0; + + rte_call(&perf->super, barrier); + + if (rte_call(&perf->super, group_index) == 0) { + ep = perf->peers[1].ep; + buffer = perf->super.send_buffer; + length = perf->super.params.message_size; + remote_addr = perf->peers[1].remote_addr; + rkey = perf->peers[1].rkey.rkey; + *(uint8_t*)buffer = 1; + + UCX_PERF_TEST_FOREACH(&perf->super) { + uct_perf_put_short_b(ep, buffer, length, remote_addr, rkey, perf); + ucx_perf_update(&perf->super, 1, length); + } + + *(uint8_t*)buffer = 2; + uct_perf_put_short_b(ep, buffer, length, remote_addr, rkey, perf); + } else { + ptr = (uint8_t*)perf->super.recv_buffer; + while (*ptr != 2); + } + return UCS_OK; +} + ucs_status_t uct_perf_test_setup_endpoints(uct_perf_context_t *perf) { unsigned group_size, i; @@ -422,21 +489,46 @@ void uct_perf_test_cleanup_endpoints(uct_perf_context_t *perf) free(perf->peers); } +static ucs_status_t uct_perf_test_dispatch(uct_perf_context_t *perf) +{ + ucs_status_t status; + + if (perf->super.params.command == UCX_PERF_TEST_CMD_PUT_SHORT && + perf->super.params.test_type == UCX_PERF_TEST_TYPE_PINGPONG && + perf->super.params.data_layout == UCX_PERF_DATA_LAYOUT_BUFFER) + { + status = uct_perf_run_put_short_lat(perf); + } else if (perf->super.params.command == UCX_PERF_TEST_CMD_PUT_SHORT && + perf->super.params.test_type == UCX_PERF_TEST_TYPE_STREAM_UNI && + perf->super.params.data_layout == UCX_PERF_DATA_LAYOUT_BUFFER) + { + status = uct_perf_run_put_short_bw(perf); + } else { + return UCS_ERR_INVALID_PARAM; + } + + uct_perf_iface_flush_b(perf); + rte_call(&perf->super, barrier); + return status; +} + ucs_status_t uct_perf_test_run(uct_context_h context, - ucx_perf_test_params_t *params, const char *hw_name, + ucx_perf_test_params_t *params, const char *dev_name, const char *tl_name, ucx_perf_result_t *result) { uct_perf_context_t perf; ucs_status_t status; + perf.context = context; + status = ucx_perf_test_init(&perf.super, params); if (status != UCS_OK) { goto out; } - perf.context = context; + ucx_perf_test_reset(&perf.super, params); - status = uct_iface_open(perf.context, tl_name, hw_name, &perf.iface); + status = uct_iface_open(perf.context, tl_name, dev_name, &perf.iface); if (status != UCS_OK) { goto out_test_cleanup; } @@ -452,16 +544,15 @@ ucs_status_t uct_perf_test_run(uct_context_h context, goto out_mem_unmap; } - /* Run test */ - if (perf.super.params.command == UCX_PERF_TEST_CMD_PUT_SHORT && - perf.super.params.test_type == UCX_PERF_TEST_TYPE_PINGPONG && - perf.super.params.data_layout == UCX_PERF_DATA_LAYOUT_BUFFER) - { - status = ucx_perf_run_put_lat(&perf); - } else { - status = UCS_ERR_INVALID_PARAM; + if (perf.super.params.flags & UCX_PERF_TEST_FLAG_WARMUP) { + perf.super.params.max_iter = 1000; + perf.super.params.report_interval = 1e10; + uct_perf_test_dispatch(&perf); + ucx_perf_test_reset(&perf.super, params); } + /* Run test */ + status = uct_perf_test_dispatch(&perf); if (status == UCS_OK) { ucx_perf_calc_result(&perf.super, result); } diff --git a/test/perf/libperf.h b/test/perf/libperf.h index 3f000544e40..17d896de61b 100644 --- a/test/perf/libperf.h +++ b/test/perf/libperf.h @@ -124,7 +124,7 @@ typedef struct ucx_perf_test_params { * Run a performance test. */ ucs_status_t uct_perf_test_run(uct_context_h context, - ucx_perf_test_params_t *params, const char *hw_name, + ucx_perf_test_params_t *params, const char *dev_name, const char *tl_name, ucx_perf_result_t *result); diff --git a/test/perf/perftest.c b/test/perf/perftest.c index e76debb51ba..4b76aa9ae01 100644 --- a/test/perf/perftest.c +++ b/test/perf/perftest.c @@ -49,7 +49,7 @@ typedef struct sock_rte_group { struct perftest_context { ucx_perf_test_params_t params; - char hw_name[UCT_MAX_NAME_LEN]; + char dev_name[UCT_MAX_NAME_LEN]; char tl_name[UCT_MAX_NAME_LEN]; uct_context_h ucth; @@ -179,6 +179,7 @@ static void usage(struct perftest_context *ctx, const char *program) printf(" -x Transport to use for testing.\n"); printf(" -t Test to run:\n"); printf(" put_lat : put latency.\n"); + printf(" put_bw : put bandwidth / message rate.\n"); printf(" -n Number of iterations to run. (%ld)\n", ctx->params.max_iter); printf(" -s Message size. (%Zu)\n", ctx->params.message_size); printf(" -N Use numeric formatting - thousands separator.\n"); @@ -212,7 +213,7 @@ void print_transports(struct perftest_context *ctx) for (res = resources; res < resources + num_resources; ++res) { printf("| %-9s | %-11s | %10.2f MB/s | %7.3f usec |\n", - res->hw_name, res->tl_name, + res->dev_name, res->tl_name, res->bandwidth / (1024.0 * 1024.0), res->latency / 1000.0); } @@ -235,7 +236,7 @@ static ucs_status_t parse_opts(struct perftest_context *ctx, int argc, char **ar ctx->params.max_iter = 1000000l; ctx->params.max_time = 0.0; ctx->params.report_interval = 1.0; - strcpy(ctx->hw_name, ""); + strcpy(ctx->dev_name, ""); strcpy(ctx->tl_name, ""); ctx->server_addr = NULL; ctx->port = 13337; @@ -247,7 +248,7 @@ static ucs_status_t parse_opts(struct perftest_context *ctx, int argc, char **ar ctx->port = atoi(optarg); break; case 'd': - ucs_snprintf_zero(ctx->hw_name, sizeof(ctx->hw_name), "%s", optarg); + ucs_snprintf_zero(ctx->dev_name, sizeof(ctx->dev_name), "%s", optarg); break; case 'x': ucs_snprintf_zero(ctx->tl_name, sizeof(ctx->tl_name), "%s", optarg); @@ -256,6 +257,9 @@ static ucs_status_t parse_opts(struct perftest_context *ctx, int argc, char **ar if (0 == strcmp(optarg, "put_lat")) { ctx->params.command = UCX_PERF_TEST_CMD_PUT_SHORT; ctx->params.test_type = UCX_PERF_TEST_TYPE_PINGPONG; + } else if (0 == strcmp(optarg, "put_bw")) { + ctx->params.command = UCX_PERF_TEST_CMD_PUT_SHORT; + ctx->params.test_type = UCX_PERF_TEST_TYPE_STREAM_UNI; } else { ucs_error("Invalid option argument for -t"); return -1; @@ -300,7 +304,7 @@ static ucs_status_t validate_params(struct perftest_context *ctx) return UCS_ERR_INVALID_PARAM; } - if (!strlen(ctx->hw_name)) { + if (!strlen(ctx->dev_name)) { ucs_error("Must specify device name"); return UCS_ERR_INVALID_PARAM; } @@ -445,7 +449,7 @@ ucs_status_t setup_sock_rte(struct perftest_context *ctx) close(sockfd); safe_recv(connfd, &ctx->params, sizeof(ctx->params)); - safe_recv(connfd, &ctx->hw_name, sizeof(ctx->hw_name)); + safe_recv(connfd, &ctx->dev_name, sizeof(ctx->dev_name)); safe_recv(connfd, &ctx->tl_name, sizeof(ctx->tl_name)); ctx->sock_rte_group.connfd = connfd; @@ -479,7 +483,7 @@ ucs_status_t setup_sock_rte(struct perftest_context *ctx) } safe_send(sockfd, &ctx->params, sizeof(ctx->params)); - safe_send(sockfd, &ctx->hw_name, sizeof(ctx->hw_name)); + safe_send(sockfd, &ctx->dev_name, sizeof(ctx->dev_name)); safe_send(sockfd, &ctx->tl_name, sizeof(ctx->tl_name)); ctx->sock_rte_group.connfd = sockfd; @@ -641,7 +645,7 @@ static ucs_status_t run_test(struct perftest_context *ctx) setlocale(LC_ALL, "en_US"); print_header(ctx); - status = uct_perf_test_run(ctx->ucth, &ctx->params, ctx->hw_name, ctx->tl_name, + status = uct_perf_test_run(ctx->ucth, &ctx->params, ctx->dev_name, ctx->tl_name, &result); if (status != UCS_OK) { ucs_error("Failed to run test: %s", ucs_status_string(status)); diff --git a/test/test.c b/test/test.c deleted file mode 100644 index 43e4c283868..00000000000 --- a/test/test.c +++ /dev/null @@ -1,26 +0,0 @@ -/** -* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. -* -* $COPYRIGHT$ -* $HEADER$ -*/ - -#include -#include - - -int main(int argc, char **argv) -{ - ucs_status_t status; - uct_context_h context; - - status = uct_init(&context); - if (status != UCS_SUCCESS) { - fprintf(stderr, "Initialization failed\n"); - return -1; - } - - uct_cleanup(context); - return 0; -} - From b953b3d7ccb8311489f4a5250795dead8703e9f9 Mon Sep 17 00:00:00 2001 From: yosefe Date: Tue, 18 Nov 2014 16:43:25 +0200 Subject: [PATCH 4/4] TEST: Fix using uct_iface_flush in test. --- test/gtest/uct/test_uct_context.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/gtest/uct/test_uct_context.cc b/test/gtest/uct/test_uct_context.cc index 0c75bd05b1c..f033675661d 100644 --- a/test/gtest/uct/test_uct_context.cc +++ b/test/gtest/uct/test_uct_context.cc @@ -157,7 +157,10 @@ class entity { void flush() { ucs_status_t status; - status = uct_iface_flush(m_iface, NULL, NULL); + do { + uct_progress(m_ucth); + status = uct_iface_flush(m_iface, NULL, NULL); + } while (status == UCS_ERR_WOULD_BLOCK); ASSERT_UCS_OK(status); }