Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IB/RC: Initial implementation of RC transport. #19

Merged
merged 4 commits into from
Nov 9, 2014
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

ACLOCAL_AMFLAGS = -I config/m4

SUBDIRS = src/ucs src/uct
SUBDIRS = src/ucs src/uct test/perf

if HAVE_GTEST
SUBDIRS += test/gtest
Expand Down
12 changes: 11 additions & 1 deletion config/m4/ib.m4
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,24 @@ AC_CHECK_HEADER([infiniband/verbs_exp.h],
[verbs_exp=no])


#
# mlx5 PRM
#
with_mlx5_hw=no
AC_CHECK_HEADERS([infiniband/mlx5_hw.h],
[AC_MSG_NOTICE([Compiling with mlx5 bare-metal support])
AC_DEFINE([HAVE_MLX5_HW], 1, [mlx5 bare-metal support])
with_mlx5_hw=yes])


#
# For automake
#
AM_CONDITIONAL([HAVE_IB], [test "x$with_ib" != xno])
AM_CONDITIONAL([HAVE_TL_RC], [test "x$with_rc" != xno])
AM_CONDITIONAL([HAVE_MLX5_HW], [test "x$with_mlx5_hw" != xno])

mlnx_valg_libdir=/usr/lib64/mlnx_ofed/valgrind
AS_IF([test -d "$mlnx_valg_libdir"],
[AC_MSG_NOTICE([Added $mlnx_valg_libdir to valgrind LD_LIBRARY_PATH])
valgrind_libpath="$mlnx_valg_libdir:$valgrind_libpath"])

1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ AC_CONFIG_FILES([
src/uct/Makefile
src/uct/api/version.h
test/gtest/Makefile
test/perf/Makefile
])

AC_OUTPUT
24 changes: 20 additions & 4 deletions src/uct/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ noinst_HEADERS = \
tl/context.h

libuct_la_SOURCES = \
tl/context.c \
tl/tl.c
tl/context.c

if HAVE_IB
noinst_HEADERS += \
Expand All @@ -36,13 +35,30 @@ libuct_la_SOURCES += \
ib/base/ib_context.c \
ib/base/ib_device.c \
ib/base/ib_iface.c

if HAVE_MLX5_HW
noinst_HEADERS += \
ib/mlx5/ib_mlx5.h

libuct_la_SOURCES += \
ib/mlx5/ib_mlx5.c
endif

endif

if HAVE_TL_RC
noinst_HEADERS += \
ib/rc/rc_ep.h \
ib/rc/rc_iface.h

libuct_la_SOURCES += \
ib/rc/rc_iface.c \
ib/rc/rc_tl.c
ib/rc/rc_ep.c \
ib/rc/rc_iface.c

if HAVE_MLX5_HW
libuct_la_SOURCES += \
ib/rc/rc_mlx5.c \
ib/rc/rc_mlx5.h
endif

endif
121 changes: 87 additions & 34 deletions src/uct/api/tl.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,25 @@


/**
* Communication interface context
* Communication resource.
*/
typedef struct uct_iface {
uct_tl_ops_t *ops;
} uct_iface_t;
typedef struct uct_resource_desc {
char tl_name[UCT_MAX_NAME_LEN]; /* Transport name */
char hw_name[UCT_MAX_NAME_LEN]; /* Hardware resource name */
uint64_t latency; /* Latency, nanoseconds */
size_t bandwidth; /* Bandwidth, bytes/second */
cpu_set_t local_cpus; /* Mask of CPUs near the resource */
socklen_t addrlen; /* Size of address */
struct sockaddr_storage subnet_addr; /* Subnet address. Devices which can
reach each other have same address */
} uct_resource_desc_t;


/**
* Remote endpoint
*/
typedef struct uct_ep {
uct_tl_ops_t *ops;
} uct_ep_t;
struct uct_iface_addr {
};

struct uct_ep_addr {
};


/**
Expand All @@ -56,54 +62,101 @@ typedef struct uct_iface_attr {


/**
* Communication resource.
* Protection domain attributes
*/
typedef struct uct_resource_desc {
char tl_name[UCT_MAX_NAME_LEN]; /* Transport name */
char hw_name[UCT_MAX_NAME_LEN]; /* Hardware resource name */
uint64_t latency; /* Latency, nanoseconds */
size_t bandwidth; /* Bandwidth, bytes/second */
cpu_set_t local_cpus; /* Mask of CPUs near the resource */
socklen_t addrlen; /* Size of address */
struct sockaddr_storage subnet_addr; /* Subnet address. Devices which can
reach each other have same address */
} uct_resource_desc_t;
typedef struct uct_pd_attr {
size_t rkey_packed_size; /* Size of buffer needed for packed rkey */
} uct_pd_attr_t;


/**
* Transport operations.
* Transport "global" operations
*/
struct uct_tl_ops {
typedef struct uct_tl_ops {

ucs_status_t (*query_resources)(uct_context_h context,
uct_resource_desc_t **resources_p,
unsigned *num_resources_p);

ucs_status_t (*iface_open)(uct_context_h context, const char *hw_name,
uct_iface_h *iface_p);
void (*iface_close)(uct_iface_h iface);
} uct_tl_ops_t;


/**
* Transport memory operations
*/
typedef struct uct_pd_ops {
ucs_status_t (*query)(uct_pd_h pd, uct_pd_attr_t *pd_attr);

ucs_status_t (*mem_map)(uct_pd_h pd, void *address, size_t length,
uct_lkey_t *lkey_p);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Who is providing RKEY ? I think it has to come out registration function. Am I right ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it comes only from rkey_unpack
user does rkey_pack(lkey) -> OOB -> rkey_unpack

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I want to use it locally, do I really want to go through pack unpack flow ? Shell we provide direct access to rkey and then have this pack/unpack option

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only reason i see you would want to use it locally is for tests over loopback. and in that case we do go through pack/unpack. i see no reason adding another API just for that.
also, forcing the user do pack/unpack prevents the error of just sending the rkey "as-is" to remote peer.


ucs_status_t (*mem_unmap)(uct_pd_h pd, uct_lkey_t lkey);

/* TODO support "mem attach", MPI-3 style */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it covers every possible case including mem attach

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i considered something like passing rkey to mem_map and it would behave like shmem_ptr(). it's a also nice wrapper to ibv_attached_shared_mr.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok


ucs_status_t (*rkey_pack)(uct_pd_h pd, uct_lkey_t lkey, void *rkey_buffer);

ucs_status_t (*rkey_unpack)(uct_pd_h pd, void *rkey_buffer, uct_rkey_t *rkey_p);

void (*rkey_release)(uct_pd_h pd, uct_rkey_t rkey);
} uct_pd_ops_t;


/**
* Transport iface operations.
*/
typedef struct uct_iface_ops {

ucs_status_t (*iface_query)(uct_iface_h iface,
uct_iface_attr_t *iface_attr);
ucs_status_t (*iface_get_address)(uct_iface_h iface,
uct_iface_addr_t *iface_addr);

ucs_status_t (*ep_create)(uct_ep_h *ep_p);
ucs_status_t (*iface_flush)(uct_iface_h iface, uct_req_h *req_p,
uct_completion_cb_t cb);

void (*iface_close)(uct_iface_h iface);

ucs_status_t (*ep_create)(uct_iface_h iface, uct_ep_h *ep_p);
void (*ep_destroy)(uct_ep_h ep);

ucs_status_t (*ep_get_address)(uct_ep_h *ep,
ucs_status_t (*ep_get_address)(uct_ep_h ep,
uct_ep_addr_t *ep_addr);
ucs_status_t (*ep_connect_to_iface)(uct_ep_h ep, uct_iface_addr_t *iface_addr);
ucs_status_t (*ep_connect_to_ep)(uct_ep_h ep, uct_iface_addr_t *iface_addr,
uct_ep_addr_t *ep_addr);
ucs_status_t (*ep_connect_to_iface)(uct_iface_addr_t *iface_addr);
ucs_status_t (*ep_connect_to_ep)(uct_iface_addr_t *iface_addr,
uct_ep_addr_t *ep_addr);

ucs_status_t (*ep_put_short)(uct_ep_h ep, void *buffer, unsigned length,
uct_rkey_t rkey, uct_req_h *req_p,
uct_completion_cb_t cb);
uint64_t remote_addr, uct_rkey_t rkey,
uct_req_h *req_p, uct_completion_cb_t cb);
} uct_iface_ops_t;

ucs_status_t (*iface_flush)(uct_iface_h iface, uct_req_h *req_p,
uct_completion_cb_t cb);
};

/**
* Protection domain
*/
typedef struct uct_pd {
uct_pd_ops_t *ops;
} uct_pd_t;


/**
* Communication interface context
*/
typedef struct uct_iface {
uct_iface_ops_t ops;
uct_pd_h pd;
} uct_iface_t;


/**
* Remote endpoint
*/
typedef struct uct_ep {
uct_iface_h iface;
} uct_ep_t;


#endif
94 changes: 87 additions & 7 deletions src/uct/api/uct.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ void uct_release_resource_list(uct_resource_desc_t *resources);


/**
* @ingroup CONTEXT
* @brief Open a communication interface.
*
* @param [in] context Handle to context.
Expand All @@ -71,12 +72,91 @@ ucs_status_t uct_iface_open(uct_context_h context, const char *tl_name,
const char *hw_name, uct_iface_h *iface_p);


/**
* @brief Close a communication interface.
*
* @param [in] iface Interface to close.
*/
void uct_iface_close(uct_iface_h iface);

static inline ucs_status_t uct_pd_query(uct_pd_h pd, uct_pd_attr_t *pd_attr)
{
return pd->ops->query(pd, pd_attr);
}

static inline ucs_status_t uct_pd_mem_map(uct_pd_h pd, void *address,
size_t length, uct_lkey_t *lkey_p)
{
return pd->ops->mem_map(pd, address, length, lkey_p);
}

static inline ucs_status_t uct_pd_mem_unmap(uct_pd_h pd, uct_lkey_t lkey)
{
return pd->ops->mem_unmap(pd, lkey);
}

static inline ucs_status_t uct_pd_rkey_pack(uct_pd_h pd, uct_lkey_t lkey, void *rkey_buffer)
{
return pd->ops->rkey_pack(pd, lkey, rkey_buffer);
}

static inline ucs_status_t uct_pd_rkey_unpack(uct_pd_h pd, void *rkey_buffer, uct_rkey_t *rkey_p)
{
return pd->ops->rkey_unpack(pd, rkey_buffer, rkey_p);
}

static inline void uct_pd_rkey_release(uct_pd_h pd, uct_rkey_t rkey)
{
pd->ops->rkey_release(pd, rkey);
}

static inline ucs_status_t uct_iface_query(uct_iface_h iface,
uct_iface_attr_t *iface_attr)
{
return iface->ops.iface_query(iface, iface_attr);
}

static inline ucs_status_t uct_iface_get_address(uct_iface_h iface,
uct_iface_addr_t *iface_addr)
{
return iface->ops.iface_get_address(iface, iface_addr);
}

static inline ucs_status_t uct_iface_flush(uct_iface_h iface, uct_req_h *req_p,
uct_completion_cb_t cb)
{
return iface->ops.iface_flush(iface, req_p, cb);
}

static inline void uct_iface_close(uct_iface_h iface)
{
iface->ops.iface_close(iface);
}

static inline ucs_status_t uct_ep_create(uct_iface_h iface, uct_ep_h *ep_p)
{
return iface->ops.ep_create(iface, ep_p);
}

static inline void uct_ep_destroy(uct_ep_h ep)
{
ep->iface->ops.ep_destroy(ep);
}

static inline ucs_status_t uct_ep_get_address(uct_ep_h ep, uct_ep_addr_t *ep_addr)
{
return ep->iface->ops.ep_get_address(ep, ep_addr);
}

static inline ucs_status_t uct_ep_connect_to_iface(uct_ep_h ep, uct_iface_addr_t *iface_addr)
{
return ep->iface->ops.ep_connect_to_iface(ep, iface_addr);
}

static inline ucs_status_t uct_ep_connect_to_ep(uct_ep_h ep, uct_iface_addr_t *iface_addr,
uct_ep_addr_t *ep_addr)
{
return ep->iface->ops.ep_connect_to_ep(ep, iface_addr, ep_addr);
}

static inline ucs_status_t uct_ep_put_short(uct_ep_h ep, void *buffer, unsigned length,
uint64_t remote_addr, uct_rkey_t rkey,
uct_req_h *req_p, uct_completion_cb_t cb)
{
return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey, req_p, cb);
}

#endif
8 changes: 4 additions & 4 deletions src/uct/api/uct_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ typedef struct uct_iface *uct_iface_h;
typedef struct uct_iface_addr uct_iface_addr_t;
typedef struct uct_ep *uct_ep_h;
typedef struct uct_ep_addr uct_ep_addr_t;
typedef struct uct_tl_ops uct_tl_ops_t;
typedef uint64_t uct_lkey_t;
typedef uint64_t uct_rkey_t;
typedef uintptr_t uct_lkey_t;
typedef uintptr_t uct_rkey_t;
typedef struct uct_req *uct_req_h;

typedef struct uct_memory_region *uct_memory_region_h;
typedef struct uct_pd *uct_pd_h;

#endif
11 changes: 5 additions & 6 deletions src/uct/ib/base/ib_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,11 @@ ucs_status_t uct_ib_query_resources(uct_context_h context, unsigned flags,

static void uct_ib_register_tls(uct_context_t *context)
{
#if HAVE_MLX5_HW
#if HAVE_TL_RC
extern uct_tl_ops_t uct_rc_tl_ops;
uct_register_tl(context, "rc", &uct_rc_tl_ops);
extern uct_tl_ops_t uct_rc_mlx5_tl_ops;
uct_register_tl(context, "rc_mlx5", &uct_rc_mlx5_tl_ops);
#endif
#endif
}

Expand Down Expand Up @@ -122,11 +124,8 @@ ucs_status_t uct_ib_init(uct_context_h context)
if (ibctx->num_devices > 0) {
ucs_debug("initialized IB component with %u devices", ibctx->num_devices);
uct_ib_register_tls(context);
status = UCS_OK;
} else {
ucs_free(ibctx->devices);
status = UCS_ERR_NO_DEVICE;
}
status = UCS_OK;

out_free_device_list:
ibv_free_device_list(device_list);
Expand Down
Loading