Skip to content

Commit

Permalink
net/mlx5e: RX, Enable skb page recycling through the page_pool
Browse files Browse the repository at this point in the history
Start using the page_pool skb recycling api to recycle all pages back to
the page pool and stop using atomic page reference counting.

The mlx5e driver used to manage in-flight pages using page refcounting:
for each fragment there were 2 atomic write operations happening (one
for building the skb and one on skb release).

The page_pool api introduced a method to track page fragments more
optimally:
* The page's pp_fragment_count is set to a large bias on page alloc
  (1 x atomic write operation).
* The driver tracks the actual page fragments in a non atomic variable.
* When the skb is recycled, pp_fragment_count is decremented
  (atomic write operation).
* When page is released in the driver, the unused number of fragments
  (relative to the bias) is deducted from pp_fragment_count (atomic
  write operation).
* Last page defragmentation will only be an atomic read.

So in total there are `number of fragments + 1` atomic write ops. As
opposed to previously: `2 * frags` atomic writes ops.

Pages are wrapped in a mlx5e_frag_page structure which also contains the
number of fragments. This makes it easy to count the fragments in the
driver.

This change brings performance improvements for the case when the old rx
page_cache had low recycling rates due to head of queue blocking. For a
iperf3 TCP test with a single stream, on a single core (iperf and receive
queue running on same core), the following improvements can be noticed:

* Striding rq:
  - before (net-next baseline): bitrate = 30.1 Gbits/sec
  - after                     : bitrate = 31.4 Gbits/sec (diff: 4.14 %)

* Legacy rq:
  - before (net-next baseline): bitrate = 30.2 Gbits/sec
  - after                     : bitrate = 33.0 Gbits/sec (diff: 8.48 %)

There are 2 temporary performance degradations introduced:

1) TCP streams that had a good recycling rate with the old page_cache
   have a degradation for both striding and linear rq. This is due to
   very low page pool cache recycling: the pages are released during skb
   recycle which will release pages to the page pool ring for safety.
   The following patches in this series will tackle this problem by
   deferring the page release in the driver to increase the
   chance of having pages recycled to the cache.

2) XDP performance is now lower (4-5 %) due to the higher number of
   atomic operations used for fragment management. But this opens the
   door for supporting multiple packets per page in XDP, which will
   bring a big gain.

Otherwise, performance is similar to baseline.

Signed-off-by: Dragos Tatulea <[email protected]>
Reviewed-by: Tariq Toukan <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
  • Loading branch information
dtatulea authored and Saeed Mahameed committed Mar 28, 2023
1 parent 4a5c5e2 commit 6f57428
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 88 deletions.
12 changes: 9 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,16 +600,22 @@ struct mlx5e_icosq {
struct work_struct recover_work;
} ____cacheline_aligned_in_smp;

struct mlx5e_frag_page {
struct page *page;
u16 frags;
};

struct mlx5e_wqe_frag_info {
union {
struct page **pagep;
struct mlx5e_frag_page *frag_page;
struct xdp_buff **xskp;
};
u32 offset;
bool last_in_page;
};

union mlx5e_alloc_units {
DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages);
DECLARE_FLEX_ARRAY(struct page *, pages);
DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs);
};
Expand Down Expand Up @@ -666,15 +672,15 @@ struct mlx5e_rq_frags_info {
struct mlx5e_dma_info {
dma_addr_t addr;
union {
struct page **pagep;
struct mlx5e_frag_page *frag_page;
struct page *page;
};
};

struct mlx5e_shampo_hd {
u32 mkey;
struct mlx5e_dma_info *info;
struct page **pages;
struct mlx5e_frag_page *pages;
u16 curr_page_index;
u16 hd_per_wq;
u16 hd_per_wqe;
Expand Down
3 changes: 1 addition & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);

/* RX */
void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
Expand Down Expand Up @@ -488,7 +487,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size

static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
{
size_t isz = struct_size(rq->mpwqe.info, alloc_units.pages, rq->mpwqe.pages_per_wqe);
size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe);

return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
}
Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
break;
case MLX5E_XDP_XMIT_MODE_PAGE:
/* XDP_TX from the regular RQ */
mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
page_pool_put_defragged_page(xdpi.page.rq->page_pool,
xdpi.page.page, -1, recycle);
break;
case MLX5E_XDP_XMIT_MODE_XSK:
/* AF_XDP send */
Expand Down
11 changes: 6 additions & 5 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
size_t alloc_size;

alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info,
alloc_units.pages,
alloc_units.frag_pages,
rq->mpwqe.pages_per_wqe));

rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
Expand Down Expand Up @@ -509,7 +509,8 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)

WARN_ON(rq->xsk_pool);

next_frag.pagep = &rq->wqe.alloc_units->pages[0];
next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0];

for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
struct mlx5e_wqe_frag_info *frag =
Expand All @@ -519,7 +520,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
/* Pages are assigned at runtime. */
next_frag.pagep++;
next_frag.frag_page++;
next_frag.offset = 0;
if (prev)
prev->last_in_page = true;
Expand Down Expand Up @@ -563,7 +564,7 @@ static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node)
if (rq->xsk_pool)
aus_sz = sizeof(*aus->xsk_buffs);
else
aus_sz = sizeof(*aus->pages);
aus_sz = sizeof(*aus->frag_pages);

aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node);
if (!aus)
Expand Down Expand Up @@ -831,7 +832,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
struct page_pool_params pp_params = { 0 };

pp_params.order = 0;
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG;
pp_params.pool_size = pool_size;
pp_params.nid = node;
pp_params.dev = rq->pdev;
Expand Down
Loading

0 comments on commit 6f57428

Please sign in to comment.