Skip to content

Commit

Permalink
net/mlx5e: Support enhanced CQE compression
Browse files Browse the repository at this point in the history
CQE compression feature improves performance by reducing PCI bandwidth
bottleneck on CQEs write.
Enhanced CQE compression introduced in ConnectX-6 and it aims to reduce
CPU utilization of SW side packets decompression by eliminating the
need to rewrite ownership bit, which is likely to cost a cache-miss, is
replaced by validity byte handled solely by HW.
Another advantage of the enhanced feature is that session packets are
available to SW as soon as a single CQE slot is filled, instead of
waiting for session to close, this improves packet latency from NIC to
host.

Performance:
Following are tested scenarios and reults comparing basic and enahnced
CQE compression.

setup: IXIA 100GbE connected directly to port 0 and port 1 of
ConnectX-6 Dx 100GbE dual port.

Case #1 RX only, single flow goes to single queue:
IRQ rate reduced by ~ 30%, CPU utilization improved by 2%.

Case #2 IP forwarding from port 1 to port 0 single flow goes to
single queue:
Avg latency improved from 60us to 21us, frame loss improved from 0.5% to 0.0%.

Case #3 IP forwarding from port 1 to port 0 Max Throughput IXIA sends
100%, 8192 UDP flows, goes to 24 queues:
Enhanced is equal or slightly better than basic.

Testing the basic compression feature with this patch shows there is
no perfrormance degradation of the basic compression feature.

Signed-off-by: Ofer Levi <[email protected]>
Reviewed-by: Tariq Toukan <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
  • Loading branch information
ofer70 authored and Saeed Mahameed committed Nov 12, 2022
1 parent 9458108 commit 2c925db
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 23 deletions.
2 changes: 2 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ enum {
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */
};

struct mlx5e_cq {
Expand All @@ -370,6 +371,7 @@ struct mlx5e_cq_decomp {
u8 mini_arr_idx;
u16 left;
u16 wqe_counter;
bool last_cqe_title;
} ____cacheline_aligned_in_smp;

enum mlx5e_dma_map_type {
Expand Down
10 changes: 8 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/params.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,13 +608,15 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;

mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d %s)\n",
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
BIT(params->log_rq_mtu_frames),
BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS),
MLX5_CAP_GEN(mdev, enhanced_cqe_compression) ?
"enhanced" : "basic");
}

void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
Expand Down Expand Up @@ -852,6 +854,10 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
MLX5_SET(cqc, cqc, cqe_compression_layout,
MLX5_CAP_GEN(mdev, enhanced_cqe_compression) ?
MLX5_CQE_COMPRESS_LAYOUT_ENHANCED :
MLX5_CQE_COMPRESS_LAYOUT_BASIC);
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
}

Expand Down
8 changes: 8 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,13 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);

/* For enhanced CQE compression packet processing. decompress
* session according to the enhanced layout.
*/
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) &&
MLX5_CAP_GEN(mdev, enhanced_cqe_compression))
__set_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state);

return 0;

err_destroy_rq:
Expand Down Expand Up @@ -1895,6 +1902,7 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);

cqe->op_own = 0xf1;
cqe->validity_iteration_count = 0xff;
}

cq->mdev = mdev;
Expand Down
150 changes: 129 additions & 21 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64));
}

static void mlx5e_read_enhanced_title_slot(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe)
{
struct mlx5e_cq_decomp *cqd = &rq->cqd;
struct mlx5_cqe64 *title = &cqd->title;

memcpy(title, cqe, sizeof(struct mlx5_cqe64));

if (likely(test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)))
return;

if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
cqd->wqe_counter = mpwrq_get_cqe_stride_index(title) +
mpwrq_get_cqe_consumed_strides(title);
else
cqd->wqe_counter =
mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, be16_to_cpu(title->wqe_counter) + 1);
}

static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
struct mlx5_cqwq *wq,
u32 cqcc)
Expand Down Expand Up @@ -175,6 +194,38 @@ static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
cqd->title.rss_hash_result = 0;
}

static u32 mlx5e_decompress_enhanced_cqe(struct mlx5e_rq *rq,
struct mlx5_cqwq *wq,
struct mlx5_cqe64 *cqe,
int budget_rem)
{
struct mlx5e_cq_decomp *cqd = &rq->cqd;
u32 cqcc, left;
u32 i;

left = get_cqe_enhanced_num_mini_cqes(cqe);
/* Here we avoid breaking the cqe compression session in the middle
* in case budget is not sufficient to handle all of it. In this case
* we return work_done == budget_rem to give 'busy' napi indication.
*/
if (unlikely(left > budget_rem))
return budget_rem;

cqcc = wq->cc;
cqd->mini_arr_idx = 0;
memcpy(cqd->mini_arr, cqe, sizeof(struct mlx5_cqe64));
for (i = 0; i < left; i++, cqd->mini_arr_idx++, cqcc++) {
mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
rq, &cqd->title);
}
wq->cc = cqcc;
rq->stats->cqe_compress_pkts += left;

return left;
}

static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
struct mlx5_cqwq *wq,
int update_owner_only,
Expand Down Expand Up @@ -220,7 +271,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
rq, &cqd->title);
cqd->mini_arr_idx++;

return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem);
}

static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
Expand Down Expand Up @@ -2211,45 +2262,102 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
}

int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
static int mlx5e_rx_cq_process_enhanced_cqe_comp(struct mlx5e_rq *rq,
struct mlx5_cqwq *cqwq,
int budget_rem)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
struct mlx5_cqwq *cqwq = &cq->wq;
struct mlx5_cqe64 *cqe;
struct mlx5_cqe64 *cqe, *title_cqe = NULL;
struct mlx5e_cq_decomp *cqd = &rq->cqd;
int work_done = 0;

if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return 0;
cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq);
if (!cqe)
return work_done;

if (rq->cqd.left) {
work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
if (work_done >= budget)
goto out;
if (cqd->last_cqe_title &&
(mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED)) {
rq->stats->cqe_compress_blks++;
cqd->last_cqe_title = false;
}

cqe = mlx5_cqwq_get_cqe(cqwq);
if (!cqe) {
if (unlikely(work_done))
goto out;
return 0;
do {
if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
if (title_cqe) {
mlx5e_read_enhanced_title_slot(rq, title_cqe);
title_cqe = NULL;
rq->stats->cqe_compress_blks++;
}
work_done +=
mlx5e_decompress_enhanced_cqe(rq, cqwq, cqe,
budget_rem - work_done);
continue;
}
title_cqe = cqe;
mlx5_cqwq_pop(cqwq);

INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
rq, cqe);
work_done++;
} while (work_done < budget_rem &&
(cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)));

/* last cqe might be title on next poll bulk */
if (title_cqe) {
mlx5e_read_enhanced_title_slot(rq, title_cqe);
cqd->last_cqe_title = true;
}

do {
return work_done;
}

static int mlx5e_rx_cq_process_basic_cqe_comp(struct mlx5e_rq *rq,
struct mlx5_cqwq *cqwq,
int budget_rem)
{
struct mlx5_cqe64 *cqe;
int work_done = 0;

if (rq->cqd.left)
work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget_rem);

while (work_done < budget_rem && (cqe = mlx5_cqwq_get_cqe(cqwq))) {
if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
work_done +=
mlx5e_decompress_cqes_start(rq, cqwq,
budget - work_done);
budget_rem - work_done);
continue;
}

mlx5_cqwq_pop(cqwq);

INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
rq, cqe);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
work_done++;
}

return work_done;
}

int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
struct mlx5_cqwq *cqwq = &cq->wq;
int work_done;

if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return 0;

if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state))
work_done = mlx5e_rx_cq_process_enhanced_cqe_comp(rq, cqwq,
budget);
else
work_done = mlx5e_rx_cq_process_basic_cqe_comp(rq, cqwq,
budget);

if (work_done == 0)
return 0;

out:
if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
mlx5e_shampo_flush_skb(rq, NULL, false);

Expand Down
17 changes: 17 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/wq.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,23 @@ static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
return cqe;
}

static inline
struct mlx5_cqe64 *mlx5_cqwq_get_cqe_enahnced_comp(struct mlx5_cqwq *wq)
{
u8 sw_validity_iteration_count = mlx5_cqwq_get_wrap_cnt(wq) & 0xff;
u32 ci = mlx5_cqwq_get_ci(wq);
struct mlx5_cqe64 *cqe;

cqe = mlx5_cqwq_get_wqe(wq, ci);
if (cqe->validity_iteration_count != sw_validity_iteration_count)
return NULL;

/* ensure cqe content is read after cqe ownership bit/validity byte */
dma_rmb();

return cqe;
}

static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
{
return (u32)wq->fbc.sz_m1 + 1;
Expand Down
6 changes: 6 additions & 0 deletions include/linux/mlx5/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
return cqe->op_own >> 4;
}

static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe)
{
/* num_of_mini_cqes is zero based */
return get_cqe_opcode(cqe) + 1;
}

static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
{
return (cqe->lro.tcppsh_abort_dupack >> 6) & 1;
Expand Down

0 comments on commit 2c925db

Please sign in to comment.