Skip to content

Commit

Permalink
Merge branch 'xdp_xmit-bulking'
Browse files Browse the repository at this point in the history
Jesper Dangaard Brouer says:

====================
This patchset change ndo_xdp_xmit API to take a bulk of xdp frames.

When kernel is compiled with CONFIG_RETPOLINE, every indirect function
pointer (branch) call hurts performance. For XDP this have a huge
negative performance impact.

This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but
also prepares for further optimizations.  The DMA APIs use of indirect
function pointer calls is the primary source the regression.  It is
left for a followup patchset, to use bulking calls towards the DMA API
(via the scatter-gatter calls).

The other advantage of this API change is that drivers can easier
amortize the cost of any sync/locking scheme, over the bulk of
packets.  The assumption of the current API is that the driver
implemementing the NDO will also allocate a dedicated XDP TX queue for
every CPU in the system.  Which is not always possible or practical to
configure. E.g. ixgbe cannot load an XDP program on a machine with
more than 96 CPUs, due to limited hardware TX queues.  E.g. virtio_net
is hard to configure as it requires manually increasing the
queues. E.g. tun driver chooses to use a per XDP frame producer lock
modulo smp_processor_id over avail queues.

I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of
this patchset.  This will be a followup patchset, once we know if this
will be needed (e.g. for non-map xdp_redirect flush-flag, and if
AF_XDP chooses to use ndo_xdp_xmit for TX).

---
V5: Fixed up issues spotted by Daniel and John

V4: Splitout the patches from 4 to 8 patches.  I cannot split the
driver changes from the NDO change, but I've tried to isolated the NDO
change together with the driver change as much as possible.
====================

Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
Alexei Starovoitov committed May 25, 2018
2 parents f80acbd + a570e48 commit 10f6786
Show file tree
Hide file tree
Showing 16 changed files with 448 additions and 86 deletions.
26 changes: 19 additions & 7 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -3664,26 +3664,38 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* @dev: netdev
* @xdp: XDP buffer
*
* Returns Zero if sent, else an error code
* Returns number of frames successfully sent. Frames that fail are
* free'ed via XDP return API.
*
* For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames).
**/
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
int err;
int drops = 0;
int i;

if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;

if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO;

err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
if (err != I40E_XDP_TX)
return -ENOSPC;
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;

return 0;
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
if (err != I40E_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}

return n - drops;
}

/**
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40e/i40e_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
void i40e_xdp_flush(struct net_device *dev);

/**
Expand Down
21 changes: 15 additions & 6 deletions drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}

static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring;
int err;
int drops = 0;
int i;

if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN;
Expand All @@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (unlikely(!ring))
return -ENXIO;

err = ixgbe_xmit_xdp_ring(adapter, xdpf);
if (err != IXGBE_XDP_TX)
return -ENOSPC;
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;

return 0;
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
if (err != IXGBE_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}

return n - drops;
}

static void ixgbe_xdp_flush(struct net_device *dev)
Expand Down
37 changes: 24 additions & 13 deletions drivers/net/tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
Expand Down Expand Up @@ -1290,34 +1291,44 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};

static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile;
u32 numqueues;
int ret = 0;
int drops = 0;
int cnt = n;
int i;

rcu_read_lock();

numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) {
ret = -ENOSPC;
goto out;
rcu_read_unlock();
return -ENXIO; /* Caller will free/return all frames */
}

tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff.
*/
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
ret = -ENOSPC;

spin_lock(&tfile->tx_ring.producer_lock);
for (i = 0; i < n; i++) {
struct xdp_frame *xdp = frames[i];
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff.
*/
void *frame = tun_xdp_to_ptr(xdp);

if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
xdp_return_frame_rx_napi(xdp);
drops++;
}
}
spin_unlock(&tfile->tx_ring.producer_lock);

out:
rcu_read_unlock();
return ret;
return cnt - drops;
}

static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
Expand All @@ -1327,7 +1338,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame))
return -EOVERFLOW;

return tun_xdp_xmit(dev, frame);
return tun_xdp_xmit(dev, 1, &frame);
}

static void tun_xdp_flush(struct net_device *dev)
Expand Down
66 changes: 49 additions & 17 deletions drivers/net/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
virtqueue_kick(sq->vq);
}

static int __virtnet_xdp_xmit(struct virtnet_info *vi,
struct xdp_frame *xdpf)
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct send_queue *sq,
struct xdp_frame *xdpf)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int err;

qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];

/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);

/* virtqueue want to use data area in-front of packet */
if (unlikely(xdpf->metasize > 0))
return -EOPNOTSUPP;
Expand All @@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
return 0;
}

static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
struct xdp_frame *xdpf)
{
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;

qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];

/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);

return __virtnet_xdp_xmit_one(vi, sq, xdpf);
}

static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int drops = 0;
int err;
int i;

qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];

/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
Expand All @@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!xdp_prog)
return -ENXIO;

return __virtnet_xdp_xmit(vi, xdpf);
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);

for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];

err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
if (err) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
}

static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
Expand Down Expand Up @@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf);
err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
Expand Down Expand Up @@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf);
err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page))
Expand Down
18 changes: 15 additions & 3 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);

/* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct xdp_buff;

struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);

struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map);
struct xdp_buff;
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);

Expand Down Expand Up @@ -573,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
{
}

struct xdp_buff;
struct bpf_dtab_netdev;

static inline
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
return 0;
}

static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
Expand All @@ -587,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
{
}

struct xdp_buff;
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff *xdp,
struct net_device *dev_rx)
Expand Down
14 changes: 9 additions & 5 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1185,9 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
* This function is used to submit a XDP packet for transmit on a
* netdevice.
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
* This function is used to submit @n XDP packets for transmit on a
* netdevice. Returns number of frames successfully transmitted, frames
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
* TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit.
Expand Down Expand Up @@ -1375,8 +1379,8 @@ struct net_device_ops {
int needed_headroom);
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev,
struct xdp_frame *xdp);
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp);
void (*ndo_xdp_flush)(struct net_device *dev);
};

Expand Down
5 changes: 3 additions & 2 deletions include/net/page_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct);

static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
static inline void page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
__page_pool_put_page(pool, page, false);
__page_pool_put_page(pool, page, allow_direct);
#endif
}
/* Very limited use-cases allow recycle direct */
Expand Down
1 change: 1 addition & 0 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
}

void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);

int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
Expand Down
Loading

0 comments on commit 10f6786

Please sign in to comment.