Skip to content
This repository has been archived by the owner on Nov 21, 2022. It is now read-only.

Commit

Permalink
Merge branch 'net_prefetch-API'
Browse files Browse the repository at this point in the history
Tariq Toukan says:

====================
net_prefetch API

This patchset adds a common net API for L1 cacheline size-aware prefetch.

Patch 1 introduces the common API in net and aligns the drivers to use it.
Patches 2 and 3 add usage in mlx4 and mlx5 Eth drivers.

Series generated against net-next commit:
079f921 Merge tag 'batadv-next-for-davem-20200824' of git://git.open-mesh.org/linux-merge
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Aug 26, 2020
2 parents 04e006b + aed4d4c commit 751e425
Show file tree
Hide file tree
Showing 17 changed files with 51 additions and 86 deletions.
5 changes: 1 addition & 4 deletions drivers/net/ethernet/chelsio/cxgb3/sge.c
Original file line number Diff line number Diff line change
Expand Up @@ -2372,10 +2372,7 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs,
if (fl->use_pages) {
void *addr = fl->sdesc[fl->cidx].pg_chunk.va;

prefetch(addr);
#if L1_CACHE_BYTES < 128
prefetch(addr + L1_CACHE_BYTES);
#endif
net_prefetch(addr);
__refill_fl(adap, fl);
if (lro > 0) {
lro_add_page(adap, qs, fl,
Expand Down
5 changes: 1 addition & 4 deletions drivers/net/ethernet/hisilicon/hns/hns_enet.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,10 +557,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;

/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);

skb = *out_skb = napi_alloc_skb(&ring_data->napi,
HNS_RX_HEAD_SIZE);
Expand Down
5 changes: 1 addition & 4 deletions drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
Original file line number Diff line number Diff line change
Expand Up @@ -3091,10 +3091,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
* lines. In such a case, single fetch would suffice to cache in the
* relevant part of the header.
*/
prefetch(ring->va);
#if L1_CACHE_BYTES < 128
prefetch(ring->va + L1_CACHE_BYTES);
#endif
net_prefetch(ring->va);

if (!skb) {
ret = hns3_alloc_skb(ring, length, ring->va);
Expand Down
5 changes: 1 addition & 4 deletions drivers/net/ethernet/intel/fm10k/fm10k_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,7 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
rx_buffer->page_offset;

/* prefetch first cache line of first page */
prefetch(page_addr);
#if L1_CACHE_BYTES < 128
prefetch((void *)((u8 *)page_addr + L1_CACHE_BYTES));
#endif
net_prefetch(page_addr);

/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi,
Expand Down
12 changes: 4 additions & 8 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1992,10 +1992,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data);

/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
Expand Down Expand Up @@ -2078,10 +2076,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data_meta);

/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
Expand Down
11 changes: 3 additions & 8 deletions drivers/net/ethernet/intel/iavf/iavf_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1309,10 +1309,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
return NULL;
/* prefetch first cache line of first page */
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);

/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
Expand Down Expand Up @@ -1376,10 +1373,8 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
return NULL;
/* prefetch first cache line of first page */
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);

/* build an skb around the page buffer */
skb = build_skb(va - IAVF_SKB_PAD, truesize);
if (unlikely(!skb))
Expand Down
10 changes: 2 additions & 8 deletions drivers/net/ethernet/intel/ice/ice_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -919,10 +919,7 @@ ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
Expand Down Expand Up @@ -964,10 +961,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif /* L1_CACHE_BYTES */
net_prefetch(xdp->data);

/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
Expand Down
10 changes: 2 additions & 8 deletions drivers/net/ethernet/intel/igb/igb_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -8047,10 +8047,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);

/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
Expand Down Expand Up @@ -8104,10 +8101,7 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);

/* build an skb around the page buffer */
skb = build_skb(va - IGB_SKB_PAD, truesize);
Expand Down
10 changes: 2 additions & 8 deletions drivers/net/ethernet/intel/igc/igc_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1550,10 +1550,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);

/* build an skb around the page buffer */
skb = build_skb(va - IGC_SKB_PAD, truesize);
Expand Down Expand Up @@ -1589,10 +1586,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);

/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
Expand Down
11 changes: 3 additions & 8 deletions drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2095,10 +2095,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data);

/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
Expand Down Expand Up @@ -2161,10 +2159,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data_meta);

/* build an skb to around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
Expand Down
11 changes: 3 additions & 8 deletions drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -866,10 +866,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data);

/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
Expand Down Expand Up @@ -947,10 +945,7 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
* have a consumer accessing first few bytes of meta data,
* and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data_meta);

/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx4/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud

frags = ring->rx_info + (index << priv->log_rx_info);
va = page_address(frags[0].page) + frags[0].page_offset;
prefetchw(va);
net_prefetchw(va);
/*
* make sure we read the CQE after we read the ownership bit
*/
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);

prefetchw(session->wqe->data);
net_prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
session->pkt_count = 0;

Expand Down Expand Up @@ -322,7 +322,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,

struct mlx5e_xdpsq_stats *stats = sq->stats;

prefetchw(wqe);
net_prefetchw(wqe);

if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++;
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt32;
xdp_set_data_meta_invalid(xdp);
xsk_buff_dma_sync_for_cpu(xdp);
prefetch(xdp->data);
net_prefetch(xdp->data);

rcu_read_lock();
consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
Expand Down Expand Up @@ -100,7 +100,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt;
xdp_set_data_meta_invalid(xdp);
xsk_buff_dma_sync_for_cpu(xdp);
prefetch(xdp->data);
net_prefetch(xdp->data);

if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
Expand Down
13 changes: 6 additions & 7 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
* SOFTWARE.
*/

#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
Expand Down Expand Up @@ -1141,8 +1140,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,

dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
prefetchw(va); /* xdp_frame data area */
prefetch(data);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);

rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
Expand Down Expand Up @@ -1184,7 +1183,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
return NULL;
}

prefetchw(skb->data);
net_prefetchw(skb->data);

while (byte_cnt) {
u16 frag_consumed_bytes =
Expand Down Expand Up @@ -1399,7 +1398,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
return NULL;
}

prefetchw(skb->data);
net_prefetchw(skb->data);

if (unlikely(frag_offset >= PAGE_SIZE)) {
di++;
Expand Down Expand Up @@ -1452,8 +1451,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,

dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
frag_size, DMA_FROM_DEVICE);
prefetchw(va); /* xdp_frame data area */
prefetch(data);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);

rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
Expand Down
3 changes: 1 addition & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
* SOFTWARE.
*/

#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <net/udp.h>
Expand Down Expand Up @@ -115,7 +114,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
return NULL;
}

prefetchw(skb->data);
net_prefetchw(skb->data);
skb_reserve(skb, NET_IP_ALIGN);

/* Reserve for ethernet and IP header */
Expand Down
16 changes: 16 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -2193,6 +2193,22 @@ int netdev_get_num_tc(struct net_device *dev)
return dev->num_tc;
}

static inline void net_prefetch(void *p)
{
prefetch(p);
#if L1_CACHE_BYTES < 128
prefetch((u8 *)p + L1_CACHE_BYTES);
#endif
}

static inline void net_prefetchw(void *p)
{
prefetchw(p);
#if L1_CACHE_BYTES < 128
prefetchw((u8 *)p + L1_CACHE_BYTES);
#endif
}

void netdev_unbind_sb_channel(struct net_device *dev,
struct net_device *sb_dev);
int netdev_bind_sb_channel_queue(struct net_device *dev,
Expand Down

0 comments on commit 751e425

Please sign in to comment.