Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IB/TEST: Implement put_short() for message rate. #26

Merged
merged 4 commits into from
Nov 21, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions contrib/test_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ if [ -z "$BUILD_NUMBER" ]; then
echo Running interactive
WORKSPACE=$PWD
BUILD_NUMBER=1
JOB_URL=file://$WORKSPACE
WS_URL=file://$WORKSPACE
else
echo Running under jenkins
JOB_URL=$JOB_URL/ws
WS_URL=$JOB_URL/ws
fi

rpm_topdir=$WORKSPACE/rpm-dist
Expand Down Expand Up @@ -57,7 +57,7 @@ nerrors=$(cov-analyze --dir $cov_build |grep "Defect occurrences found" | awk '{
cov-format-errors --dir $cov_build
rc=$(($rc+$nerrors))

cov_url="$JOB_URL/$cov_build_id/c/output/errors/index.html"
cov_url="$WS_URL/$cov_build_id/c/output/errors/index.html"
rm -f jenkins_sidelinks.txt
echo 1..1 > coverity.tap
if [ $nerrors -gt 0 ]; then
Expand Down
8 changes: 4 additions & 4 deletions src/ucs/stats/stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,19 +447,19 @@ int ucs_stats_is_active()

#else

void mxm_stats_init()
void ucs_stats_init()
{
}

void mxm_stats_cleanup()
void ucs_stats_cleanup()
{
}

void mxm_stats_dump()
void ucs_stats_dump()
{
}

int mxm_stats_is_active()
int ucs_stats_is_active()
{
return 0;
}
Expand Down
1 change: 1 addition & 0 deletions src/uct/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ endif

if HAVE_TL_RC
noinst_HEADERS += \
ib/rc/rc_def.h \
ib/rc/rc_ep.h \
ib/rc/rc_iface.h

Expand Down
17 changes: 12 additions & 5 deletions src/uct/api/tl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
* Communication resource.
*/
typedef struct uct_resource_desc {
char tl_name[UCT_MAX_NAME_LEN]; /* Transport name */
char hw_name[UCT_MAX_NAME_LEN]; /* Hardware resource name */
char tl_name[UCT_MAX_NAME_LEN]; /* Transport name */
char dev_name[UCT_MAX_NAME_LEN]; /* Hardware device name */
uint64_t latency; /* Latency, nanoseconds */
size_t bandwidth; /* Bandwidth, bytes/second */
cpu_set_t local_cpus; /* Mask of CPUs near the resource */
Expand All @@ -48,6 +48,12 @@ struct uct_ep_addr {
typedef void (*uct_completion_cb_t)(uct_req_h req, ucs_status_t status);


typedef struct uct_callback uct_callback_t;
struct uct_callback {
void (*cb)(uct_callback_t *self, ucs_status_t status);
};


/**
* Remote key release function.
*/
Expand Down Expand Up @@ -92,7 +98,7 @@ typedef struct uct_tl_ops {
uct_resource_desc_t **resources_p,
unsigned *num_resources_p);

ucs_status_t (*iface_open)(uct_context_h context, const char *hw_name,
ucs_status_t (*iface_open)(uct_context_h context, const char *dev_name,
uct_iface_h *iface_p);

ucs_status_t (*rkey_unpack)(uct_context_h context, void *rkey_buffer,
Expand Down Expand Up @@ -146,8 +152,8 @@ typedef struct uct_iface_ops {
uct_ep_addr_t *ep_addr);

ucs_status_t (*ep_put_short)(uct_ep_h ep, void *buffer, unsigned length,
uint64_t remote_addr, uct_rkey_t rkey,
uct_req_h *req_p, uct_completion_cb_t cb);
uint64_t remote_addr, uct_rkey_t rkey);

} uct_iface_ops_t;


Expand All @@ -156,6 +162,7 @@ typedef struct uct_iface_ops {
*/
typedef struct uct_pd {
uct_pd_ops_t *ops;
uct_context_h context;
} uct_pd_t;


Expand Down
18 changes: 13 additions & 5 deletions src/uct/api/uct.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ ucs_status_t uct_init(uct_context_h *context_p);
void uct_cleanup(uct_context_h context);


/**
* @ingroup CONTEXT
* @brief Progress all communications of the context.
*
* @param [in] context Handle to context.
*/
void uct_progress(uct_context_h context);


/**
* @ingroup CONTEXT
* @brief Query for transport resources.
Expand Down Expand Up @@ -63,13 +72,13 @@ void uct_release_resource_list(uct_resource_desc_t *resources);
*
* @param [in] context Handle to context.
* @param [in] tl_name Transport name.
* @param [in] hw_name Hardware resource name,
* @param [in] dev_name Hardware device name,
* @param [out] iface_p Filled with a handle to opened communication interface.
*
* @return Error code.
*/
ucs_status_t uct_iface_open(uct_context_h context, const char *tl_name,
const char *hw_name, uct_iface_h *iface_p);
const char *dev_name, uct_iface_h *iface_p);


/**
Expand Down Expand Up @@ -169,10 +178,9 @@ static inline ucs_status_t uct_ep_connect_to_ep(uct_ep_h ep, uct_iface_addr_t *i
}

static inline ucs_status_t uct_ep_put_short(uct_ep_h ep, void *buffer, unsigned length,
uint64_t remote_addr, uct_rkey_t rkey,
uct_req_h *req_p, uct_completion_cb_t cb)
uint64_t remote_addr, uct_rkey_t rkey)
{
return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey, req_p, cb);
return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey);
}

#endif
2 changes: 1 addition & 1 deletion src/uct/ib/base/ib_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ ucs_status_t uct_ib_init(uct_context_h context)
/* TODO apply a user-defined regex/wildcard filter */
ibctx->num_devices = 0;
for (i = 0; i < num_devices; ++i) {
status = uct_ib_device_create(device_list[i],
status = uct_ib_device_create(context, device_list[i],
&ibctx->devices[ibctx->num_devices]);
if (status != UCS_OK) {
ucs_warn("Failed to initialize %s (%s), ignoring it",
Expand Down
17 changes: 10 additions & 7 deletions src/uct/ib/base/ib_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ uct_pd_ops_t uct_ib_pd_ops = {
.rkey_pack = uct_ib_rkey_pack,
};

ucs_status_t uct_ib_device_create(struct ibv_device *ibv_device, uct_ib_device_t **dev_p)
ucs_status_t uct_ib_device_create(uct_context_h context,
struct ibv_device *ibv_device,
uct_ib_device_t **dev_p)
{
struct ibv_context *ibv_context;
struct ibv_exp_device_attr dev_attr;
Expand Down Expand Up @@ -178,11 +180,12 @@ ucs_status_t uct_ib_device_create(struct ibv_device *ibv_device, uct_ib_device_t
}

/* Save device information */
dev->super.ops = &uct_ib_pd_ops;
dev->ibv_context = ibv_context;
dev->dev_attr = dev_attr;
dev->first_port = first_port;
dev->num_ports = num_ports;
dev->super.ops = &uct_ib_pd_ops;
dev->super.context = context;
dev->ibv_context = ibv_context;
dev->dev_attr = dev_attr;
dev->first_port = first_port;
dev->num_ports = num_ports;

/* Get device locality */
uct_ib_device_get_affinity(ibv_get_device_name(ibv_device), &dev->local_cpus);
Expand Down Expand Up @@ -281,7 +284,7 @@ ucs_status_t uct_ib_device_port_get_resource(uct_ib_device_t *dev, uint8_t port_
int ret;

/* HCA:Port is the hardware resource name */
ucs_snprintf_zero(resource->hw_name, sizeof(resource->hw_name), "%s:%d",
ucs_snprintf_zero(resource->dev_name, sizeof(resource->dev_name), "%s:%d",
uct_ib_device_name(dev), port_num);

/* Port network address */
Expand Down
6 changes: 5 additions & 1 deletion src/uct/ib/base/ib_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,15 @@ struct uct_ib_device {
uint8_t first_port; /* Number of first port (usually 1) */
uint8_t num_ports; /* Amount of physical ports */
cpu_set_t local_cpus; /* CPUs local to device */
pthread_t async_thread; /* Async event thread */
int stop_thread;
struct ibv_exp_port_attr port_attr[0]; /* Cached port attributes */
};


ucs_status_t uct_ib_device_create(struct ibv_device *ibv_device, uct_ib_device_t **dev_p);
ucs_status_t uct_ib_device_create(uct_context_h context,
struct ibv_device *ibv_device,
uct_ib_device_t **dev_p);
void uct_ib_device_destroy(uct_ib_device_t *dev);


Expand Down
20 changes: 11 additions & 9 deletions src/uct/ib/base/ib_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,27 @@

static ucs_status_t uct_ib_iface_find_port(uct_ib_context_t *ibctx,
uct_ib_iface_t *iface,
const char *hw_name)
const char *dev_name)
{
uct_ib_device_t *dev;
const char *dev_name;
const char *ibdev_name;
unsigned port_num;
unsigned dev_index;
size_t devname_len;
char *p;

p = strrchr(hw_name, ':');
p = strrchr(dev_name, ':');
if (p == NULL) {
return UCS_ERR_INVALID_PARAM; /* Wrong hw_name format */
return UCS_ERR_INVALID_PARAM; /* Wrong dev_name format */
}
devname_len = p - hw_name;
devname_len = p - dev_name;

for (dev_index = 0; dev_index < ibctx->num_devices; ++dev_index) {
dev = ibctx->devices[dev_index];
dev_name = uct_ib_device_name(dev);
if ((strlen(dev_name) == devname_len) && !strncmp(dev_name, hw_name, devname_len)) {
ibdev_name = uct_ib_device_name(dev);
if ((strlen(ibdev_name) == devname_len) &&
!strncmp(ibdev_name, dev_name, devname_len))
{
port_num = strtod(p + 1, &p);
if (*p != '\0') {
return UCS_ERR_INVALID_PARAM; /* Failed to parse port number */
Expand All @@ -54,14 +56,14 @@ static ucs_status_t uct_ib_iface_find_port(uct_ib_context_t *ibctx,
}

ucs_status_t ucs_ib_iface_init(uct_context_h context, uct_ib_iface_t *iface,
const char *hw_name)
const char *dev_name)
{
uct_ib_context_t *ibctx = ucs_component_get(context, ib, uct_ib_context_t);
struct ibv_exp_port_attr *port_attr;
uct_ib_device_t *dev;
ucs_status_t status;

status = uct_ib_iface_find_port(ibctx, iface, hw_name);
status = uct_ib_iface_find_port(ibctx, iface, dev_name);
if (status != UCS_OK) {
goto err;
}
Expand Down
2 changes: 1 addition & 1 deletion src/uct/ib/base/ib_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ typedef struct uct_ib_iface {


ucs_status_t ucs_ib_iface_init(uct_context_h context, uct_ib_iface_t *iface,
const char *hw_name);
const char *dev_name);
void ucs_ib_iface_cleanup(uct_ib_iface_t *iface);

static inline uct_ib_device_t * uct_ib_iface_device(uct_ib_iface_t *iface)
Expand Down
16 changes: 16 additions & 0 deletions src/uct/ib/base/ib_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,29 @@ static inline struct ibv_mr *ibv_exp_reg_mr(struct ibv_exp_reg_mr_in *in)
# define IBV_SHARED_MR_ACCESS_FLAGS(_shared_mr) ((_shared_mr)->exp_access)
# define IBV_EXP_DEVICE_ATTR_SET_COMP_MASK(_attr)
# define IBV_EXP_PORT_ATTR_SET_COMP_MASK(_attr)

static inline int ibv_exp_cq_ignore_overrun(struct ibv_cq *cq)
{
errno = ENOSYS;
return -1;
}

#else
# define IBV_IS_MPAGES_AVAIL(_attr) ((_attr)->device_cap_flags2 & IBV_EXP_DEVICE_MR_ALLOCATE)
# define IBV_DEVICE_HAS_DC(_attr) ((_attr)->exp_device_cap_flags & IBV_EXP_DEVICE_DC_TRANSPORT)
# define IBV_EXP_REG_MR_FLAGS(_f, _e) (_f) , (_e)
# define IBV_SHARED_MR_ACCESS_FLAGS(_shared_mr) ((_shared_mr)->access)
# define IBV_EXP_DEVICE_ATTR_SET_COMP_MASK(_attr) (_attr)->comp_mask = (IBV_EXP_DEVICE_ATTR_RESERVED - 1)
# define IBV_EXP_PORT_ATTR_SET_COMP_MASK(_attr) (_attr)->comp_mask = 0

static inline int ibv_exp_cq_ignore_overrun(struct ibv_cq *cq)
{
struct ibv_exp_cq_attr cq_attr = {0};
cq_attr.comp_mask = IBV_EXP_CQ_ATTR_CQ_CAP_FLAGS;
cq_attr.cq_cap_flags = IBV_EXP_CQ_IGNORE_OVERRUN;
return ibv_exp_modify_cq(cq, &cq_attr, IBV_EXP_CQ_CAP_FLAGS);
}

#endif

#endif
3 changes: 3 additions & 0 deletions src/uct/ib/mlx5/ib_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "ib_mlx5.h"

#include <ucs/debug/log.h>
#include <ucs/sys/compiler.h>
#include <string.h>

Expand All @@ -16,6 +17,8 @@ ucs_status_t uct_ib_mlx5_get_qp_info(struct ibv_qp *qp, uct_ib_mlx5_qp_info_t *q
struct mlx5_qp *mqp = ucs_container_of(qp, struct mlx5_qp, verbs_qp.qp);

if ((mqp->sq.cur_post != 0) || (mqp->rq.head != 0) || mqp->bf->need_lock) {
ucs_warn("cur_post=%d head=%d need_lock=%d", mqp->sq.cur_post,
mqp->rq.head, mqp->bf->need_lock);
return UCS_ERR_NO_DEVICE;
}

Expand Down
10 changes: 10 additions & 0 deletions src/uct/ib/mlx5/ib_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,14 @@ void uct_ib_mlx5_update_cq_ci(struct ibv_cq *cq, unsigned cq_ci);
void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av);


static inline int uct_ib_mlx5_cqe_hw_owned(struct mlx5_cqe64 *cqe, unsigned index,
unsigned cq_length)
{
uint8_t op_own = cqe->op_own;

return ((op_own & MLX5_CQE_OWNER_MASK) == !(index & cq_length) ||
(op_own & 0xF0) == (MLX5_CQE_INVALID << 4));
}


#endif
20 changes: 20 additions & 0 deletions src/uct/ib/rc/rc_def.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
*
* $COPYRIGHT$
* $HEADER$
*/

#ifndef RC_DEF_H_
#define RC_DEF_H_


#define UCT_RC_TX_QP_LEN 128
#define UCT_RC_QP_HASH_SIZE 256

typedef struct uct_rc_ep uct_rc_ep_t;
typedef struct uct_rc_iface uct_rc_iface_t;
typedef struct uct_rc_ep_addr uct_rc_ep_addr_t;


#endif
7 changes: 6 additions & 1 deletion src/uct/ib/rc/rc_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ ucs_status_t uct_rc_ep_init(uct_rc_ep_t *ep)
qp_init_attr.send_cq = iface->super.send_cq;
qp_init_attr.recv_cq = iface->super.recv_cq;
qp_init_attr.srq = NULL; /* TODO */
qp_init_attr.cap.max_send_wr = 1024;
qp_init_attr.cap.max_send_wr = UCT_RC_TX_QP_LEN;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is one of the TODO things that in feature you plan to make tunable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. so far we have not dealt with the issue of how to pass the configuration to the objects, but i guess would need to do it soon.

qp_init_attr.cap.max_recv_wr = 1024;
qp_init_attr.cap.max_send_sge = 2;
qp_init_attr.cap.max_recv_sge = 1;
Expand All @@ -40,6 +40,8 @@ ucs_status_t uct_rc_ep_init(uct_rc_ep_t *ep)
goto err;
}

ep->qp_num = ep->qp->qp_num;
uct_rc_iface_add_ep(iface, ep);
return UCS_OK;

err:
Expand All @@ -48,8 +50,11 @@ ucs_status_t uct_rc_ep_init(uct_rc_ep_t *ep)

void uct_rc_ep_cleanup(uct_rc_ep_t *ep)
{
uct_rc_iface_t *iface = ucs_derived_of(ep->super.iface, uct_rc_iface_t);
int ret;

uct_rc_iface_remove_ep(iface, ep);

ret = ibv_destroy_qp(ep->qp);
if (ret != 0) {
ucs_warn("ibv_destroy_qp() returned %d: %m", ret);
Expand Down
Loading