Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

a new branch fixed RoCE issues for recv/send example #10

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
CC=gcc
CFLAGS=-Wall -Werror -O2
INCLUDES=
LDFLAGS=
LIBS=-pthread -lrdmacm
LDFLAGS=-libverbs
LIBS=-pthread

SRCS=main.c client.c config.c ib.c server.c setup_ib.c sock.c
OBJS=$(SRCS:.c=.o)
Expand Down
2 changes: 2 additions & 0 deletions config.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ void print_config_info ()
log ("msg_size = %d", config_info.msg_size);
log ("num_concurr_msgs = %d", config_info.num_concurr_msgs);
log ("sock_port = %s", config_info.sock_port);
log ("dev_index = %hhu", config_info.dev_index);
log ("gid_index = %hhu", config_info.gid_index);
if (config_info.is_server == false) {
log ("server_name = %s", config_info.server_name);
}
Expand Down
2 changes: 2 additions & 0 deletions config.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ struct ConfigInfo {

int msg_size; /* the size of each echo message */
int num_concurr_msgs; /* the number of messages can be sent concurrently */
int gid_index; /* GID index of in ibv_devinfo -v -d mlx5_4 */
int dev_index; /* device index of in ibv_devinfo */

char *sock_port; /* socket port number */
char *server_name; /* server name */
Expand Down
22 changes: 17 additions & 5 deletions ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "ib.h"
#include "debug.h"

int modify_qp_to_rts (struct ibv_qp *qp, uint32_t target_qp_num, uint16_t target_lid)
int modify_qp_to_rts (struct ibv_qp *qp, struct QPInfo *local, struct QPInfo *remote)
{
int ret = 0;

Expand All @@ -31,22 +31,34 @@ int modify_qp_to_rts (struct ibv_qp *qp, uint32_t target_qp_num, uint16_t target
struct ibv_qp_attr qp_attr = {
.qp_state = IBV_QPS_RTR,
.path_mtu = IB_MTU,
.dest_qp_num = target_qp_num,
.dest_qp_num = remote->qp_num,
.rq_psn = 0,
.max_dest_rd_atomic = 1,
.min_rnr_timer = 12,
.ah_attr.is_global = 0,
.ah_attr.dlid = target_lid,
.ah_attr.dlid = remote->lid,
.ah_attr.sl = IB_SL,
.ah_attr.src_path_bits = 0,
.ah_attr.port_num = IB_PORT,
};

printf("qp: %u ==> %u, lid: %hu\n", local->qp_num, remote->qp_num, remote->lid);

if (remote->lid == 0) {
printf("using gid\n");
qp_attr.ah_attr.is_global = 1;
qp_attr.ah_attr.grh.sgid_index = local->gid_index;
qp_attr.ah_attr.grh.dgid = remote->gid;
qp_attr.ah_attr.grh.hop_limit = 0xFF;
qp_attr.ah_attr.grh.traffic_class = 0;
}
ret = ibv_modify_qp(qp, &qp_attr,
IBV_QP_STATE | IBV_QP_AV |
IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER);
IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER |
0);
check (ret == 0, "Failed to change qp to rtr.");
}

Expand Down
6 changes: 4 additions & 2 deletions ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <infiniband/verbs.h>
#include <arpa/inet.h>

#define IB_MTU IBV_MTU_4096
#define IB_MTU IBV_MTU_1024
#define IB_PORT 1
#define IB_SL 0
#define IB_WR_ID_STOP 0xE000000000000000
Expand All @@ -28,6 +28,8 @@ static inline uint64_t ntohll (uint64_t x) {return x; }
struct QPInfo {
uint16_t lid;
uint32_t qp_num;
union ibv_gid gid;
uint8_t gid_index;
}__attribute__ ((packed));

enum MsgType {
Expand All @@ -36,7 +38,7 @@ enum MsgType {
MSG_REGULAR,
};

int modify_qp_to_rts (struct ibv_qp *qp, uint32_t qp_num, uint16_t lid);
int modify_qp_to_rts (struct ibv_qp *qp, struct QPInfo *local, struct QPInfo *remote);

int post_send (uint32_t req_size, uint32_t lkey, uint64_t wr_id,
uint32_t imm_data, struct ibv_qp *qp, char *buf);
Expand Down
13 changes: 9 additions & 4 deletions main.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <stdio.h>
#include <stdlib.h>

#include "debug.h"
#include "config.h"
Expand All @@ -16,20 +17,24 @@ int main (int argc, char *argv[])
{
int ret = 0;

if (argc == 5) {
if (argc == 7) {
config_info.is_server = false;
config_info.server_name = argv[1];
config_info.msg_size = atoi (argv[2]);
config_info.num_concurr_msgs = atoi (argv[3]);
config_info.sock_port = argv[4];
} else if (argc == 4) {
config_info.dev_index = atoi (argv[5]);
config_info.gid_index = atoi (argv[6]);
} else if (argc == 6) {
config_info.is_server = true;
config_info.msg_size = atoi (argv[1]);
config_info.num_concurr_msgs = atoi (argv[2]);
config_info.sock_port = argv[3];
config_info.dev_index = atoi (argv[4]);
config_info.gid_index = atoi (argv[5]);
} else {
printf ("Server: %s msg_size num_concurr_msgs sock_port\n", argv[0]);
printf ("Client: %s server_name msg_size num_concurr_msgs sock_port\n", argv[0]);
printf ("Server: %s msg_size num_concurr_msgs sock_port dev_index gid_index\n", argv[0]);
printf ("Client: %s server_name msg_size num_concurr_msgs sock_port dev_index gid_index\n", argv[0]);
return 0;
}

Expand Down
32 changes: 24 additions & 8 deletions setup_ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@ int connect_qp_server ()
/* init local qp_info */
local_qp_info.lid = ib_res.port_attr.lid;
local_qp_info.qp_num = ib_res.qp->qp_num;
local_qp_info.gid_index = ib_res.gid_index;

if (local_qp_info.lid == 0 && ib_res.port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
ret = ibv_query_gid(ib_res.ctx, IB_PORT, local_qp_info.gid_index, &local_qp_info.gid);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A suggested fix for LINE#37:

ret = ibv_query_gid(ib_res.ctx, config_info.gid_index, config_info.dev_index, &local_qp_info.gid);

check (ret == 0, "failed to get gid");
}

/* get qp_info from client */
ret = sock_get_qp_info (peer_sockfd, &remote_qp_info);
check (ret == 0, "Failed to get qp_info from client");
Expand All @@ -41,8 +47,7 @@ int connect_qp_server ()
check (ret == 0, "Failed to send qp_info to client");

/* change send QP state to RTS */
ret = modify_qp_to_rts (ib_res.qp, remote_qp_info.qp_num,
remote_qp_info.lid);
ret = modify_qp_to_rts (ib_res.qp, &local_qp_info, &remote_qp_info);
check (ret == 0, "Failed to modify qp to rts");

log (LOG_SUB_HEADER, "Start of IB Config");
Expand Down Expand Up @@ -87,7 +92,13 @@ int connect_qp_client ()

local_qp_info.lid = ib_res.port_attr.lid;
local_qp_info.qp_num = ib_res.qp->qp_num;

local_qp_info.gid_index = ib_res.gid_index;

if (local_qp_info.lid == 0 && ib_res.port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
ret = ibv_query_gid(ib_res.ctx, IB_PORT, local_qp_info.gid_index, &local_qp_info.gid);
check (ret == 0, "failed to get gid");
}

/* send qp_info to server */
ret = sock_set_qp_info (peer_sockfd, &local_qp_info);
check (ret == 0, "Failed to send qp_info to server");
Expand All @@ -97,8 +108,7 @@ int connect_qp_client ()
check (ret == 0, "Failed to get qp_info from server");

/* change QP state to RTS */
ret = modify_qp_to_rts (ib_res.qp, remote_qp_info.qp_num,
remote_qp_info.lid);
ret = modify_qp_to_rts (ib_res.qp, &local_qp_info, &remote_qp_info);
check (ret == 0, "Failed to modify qp to rts");

log (LOG_SUB_HEADER, "IB Config");
Expand Down Expand Up @@ -135,9 +145,11 @@ int setup_ib ()
check(dev_list != NULL, "Failed to get ib device list.");

/* create IB context */
ib_res.ctx = ibv_open_device(*dev_list);
ib_res.ctx = ibv_open_device(dev_list[config_info.dev_index]);
check(ib_res.ctx != NULL, "Failed to open ib device.");

printf("using first device, name: %s, dev_name: %s\n", ib_res.ctx->device->name, ib_res.ctx->device->dev_name);

/* allocate protection domain */
ib_res.pd = ibv_alloc_pd(ib_res.ctx);
check(ib_res.pd != NULL, "Failed to allocate protection domain.");
Expand All @@ -146,6 +158,10 @@ int setup_ib ()
ret = ibv_query_port(ib_res.ctx, IB_PORT, &ib_res.port_attr);
check(ret == 0, "Failed to query IB port information.");

printf("number of gid in device: %d\n", ib_res.port_attr.gid_tbl_len);
printf("max mtu: %d\n", ib_res.port_attr.max_mtu);
ib_res.gid_index = config_info.gid_index;

/* register mr */
ib_res.ib_buf_size = config_info.msg_size * config_info.num_concurr_msgs;
ib_res.ib_buf = (char *) memalign (4096, ib_res.ib_buf_size);
Expand All @@ -172,8 +188,8 @@ int setup_ib ()
.send_cq = ib_res.cq,
.recv_cq = ib_res.cq,
.cap = {
.max_send_wr = ib_res.dev_attr.max_qp_wr,
.max_recv_wr = ib_res.dev_attr.max_qp_wr,
.max_send_wr = 1024,
.max_recv_wr = 1024,
.max_send_sge = 1,
.max_recv_sge = 1,
},
Expand Down
1 change: 1 addition & 0 deletions setup_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct IBRes {
struct ibv_qp *qp;
struct ibv_port_attr port_attr;
struct ibv_device_attr dev_attr;
uint8_t gid_index;

char *ib_buf;
size_t ib_buf_size;
Expand Down
12 changes: 2 additions & 10 deletions sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,8 @@ int sock_create_connect (char *server_name, char *port)
int sock_set_qp_info(int sock_fd, struct QPInfo *qp_info)
{
int n;
struct QPInfo tmp_qp_info;

tmp_qp_info.lid = htons(qp_info->lid);
tmp_qp_info.qp_num = htonl(qp_info->qp_num);

n = sock_write(sock_fd, (char *)&tmp_qp_info, sizeof(struct QPInfo));
n = sock_write(sock_fd, (char *)qp_info, sizeof(struct QPInfo));
check(n==sizeof(struct QPInfo), "write qp_info to socket.");

return 0;
Expand All @@ -161,14 +157,10 @@ int sock_set_qp_info(int sock_fd, struct QPInfo *qp_info)
int sock_get_qp_info(int sock_fd, struct QPInfo *qp_info)
{
int n;
struct QPInfo tmp_qp_info;

n = sock_read(sock_fd, (char *)&tmp_qp_info, sizeof(struct QPInfo));
n = sock_read(sock_fd, (char *)qp_info, sizeof(struct QPInfo));
check(n==sizeof(struct QPInfo), "read qp_info from socket.");

qp_info->lid = ntohs(tmp_qp_info.lid);
qp_info->qp_num = ntohl(tmp_qp_info.qp_num);

return 0;

error:
Expand Down