Skip to content

Commit

Permalink
Merge branch 'ndo_xdp_xmit-cleanup'
Browse files Browse the repository at this point in the history
Jesper Dangaard Brouer says:

====================
As I mentioned in merge commit 10f6786 ("Merge branch 'xdp_xmit-bulking'")
I plan to change the API for ndo_xdp_xmit once more, by adding a flags
argument, which is done in this patchset.

I know it is late in the cycle (currently at rc7), but it would be
nice to avoid changing NDOs over several kernel releases, as it is
annoying to vendors and distro backporters, but it is not strictly
UAPI so it is allowed (according to Alexei).

The end-goal is getting rid of the ndo_xdp_flush operation, as it will
make it possible for drivers to implement a TXQ synchronization mechanism
that is not necessarily derived from the CPU id (smp_processor_id).

This patchset removes all callers of the ndo_xdp_flush operation, but
it doesn't take the last step of removing it from all drivers.  This
can be done later, or I can update the patchset on request.

Micro-benchmarks only show a very small performance improvement, for
map-redirect around ~2 ns, and for non-map redirect ~7 ns.  I've not
benchmarked this with CONFIG_RETPOLINE, but the performance benefit
should be more visible given we end-up removing an indirect call.

---
V2: Updated based on feedback from Song Liu <[email protected]>
====================

Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
Alexei Starovoitov committed Jun 3, 2018
2 parents 69b4507 + c1ece6b commit ea9916e
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 35 deletions.
14 changes: 12 additions & 2 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -3670,11 +3670,13 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames).
**/
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
struct i40e_ring *xdp_ring;
int drops = 0;
int i;

Expand All @@ -3684,17 +3686,25 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO;

if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;

xdp_ring = vsi->xdp_rings[queue_index];

for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;

err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
if (err != I40E_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}

if (unlikely(flags & XDP_XMIT_FLUSH))
i40e_xdp_ring_update_tail(xdp_ring);

return n - drops;
}

Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/intel/i40e/i40e_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,8 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
void i40e_xdp_flush(struct net_device *dev);

/**
Expand Down
23 changes: 17 additions & 6 deletions drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10022,8 +10022,17 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}

static void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
{
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
}

static int ixgbe_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames)
struct xdp_frame **frames, u32 flags)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring;
Expand All @@ -10033,6 +10042,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN;

if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;

/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
Expand All @@ -10051,6 +10063,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
}
}

if (unlikely(flags & XDP_XMIT_FLUSH))
ixgbe_xdp_ring_update_tail(ring);

return n - drops;
}

Expand All @@ -10069,11 +10084,7 @@ static void ixgbe_xdp_flush(struct net_device *dev)
if (unlikely(!ring))
return;

/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
ixgbe_xdp_ring_update_tail(ring);

return;
}
Expand Down
25 changes: 18 additions & 7 deletions drivers/net/tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,16 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};

static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
static void __tun_xdp_flush_tfile(struct tun_file *tfile)
{
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
}

static int tun_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames, u32 flags)
{
struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile;
Expand All @@ -1294,6 +1303,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
int cnt = n;
int i;

if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;

rcu_read_lock();

numqueues = READ_ONCE(tun->numqueues);
Expand Down Expand Up @@ -1321,6 +1333,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
}
spin_unlock(&tfile->tx_ring.producer_lock);

if (flags & XDP_XMIT_FLUSH)
__tun_xdp_flush_tfile(tfile);

rcu_read_unlock();
return cnt - drops;
}
Expand All @@ -1332,7 +1347,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame))
return -EOVERFLOW;

return tun_xdp_xmit(dev, 1, &frame);
return tun_xdp_xmit(dev, 1, &frame, 0);
}

static void tun_xdp_flush(struct net_device *dev)
Expand All @@ -1349,11 +1364,7 @@ static void tun_xdp_flush(struct net_device *dev)

tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);

__tun_xdp_flush_tfile(tfile);
out:
rcu_read_unlock();
}
Expand Down
9 changes: 8 additions & 1 deletion drivers/net/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
}

static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames)
int n, struct xdp_frame **frames, u32 flags)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
Expand All @@ -481,6 +481,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
int err;
int i;

if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;

qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];

Expand All @@ -504,6 +507,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,
drops++;
}
}

if (flags & XDP_XMIT_FLUSH)
virtqueue_kick(sq->vq);

return n - drops;
}

Expand Down
7 changes: 4 additions & 3 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1185,13 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp,
* u32 flags);
* This function is used to submit @n XDP packets for transmit on a
* netdevice. Returns number of frames successfully transmitted, frames
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
* TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit.
Expand Down Expand Up @@ -1380,7 +1380,8 @@ struct net_device_ops {
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp);
struct xdp_frame **xdp,
u32 flags);
void (*ndo_xdp_flush)(struct net_device *dev);
};

Expand Down
4 changes: 4 additions & 0 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ enum xdp_mem_type {
MEM_TYPE_MAX,
};

/* XDP flags for ndo_xdp_xmit */
#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH

struct xdp_mem_info {
u32 type; /* enum xdp_mem_type, but known size type */
u32 id;
Expand Down
19 changes: 6 additions & 13 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
}

static int bq_xmit_all(struct bpf_dtab_netdev *obj,
struct xdp_bulk_queue *bq)
struct xdp_bulk_queue *bq, u32 flags)
{
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0;
Expand All @@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
prefetch(xdpf);
}

sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
if (sent < 0) {
err = sent;
sent = 0;
Expand Down Expand Up @@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
struct net_device *netdev;

/* This is possible if the dev entry is removed by user space
* between xdp redirect and flush op.
Expand All @@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map)
__clear_bit(bit, bitmap);

bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq);
netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev);
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
}
}

Expand Down Expand Up @@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);

if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(obj, bq);
bq_xmit_all(obj, bq, 0);

/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
Expand Down Expand Up @@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)

static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
if (dev->dev->netdev_ops->ndo_xdp_flush) {
struct net_device *fl = dev->dev;
if (dev->dev->netdev_ops->ndo_xdp_xmit) {
struct xdp_bulk_queue *bq;
unsigned long *bitmap;

Expand All @@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
__clear_bit(dev->bit, bitmap);

bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq);

fl->netdev_ops->ndo_xdp_flush(dev->dev);
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
}
}
}
Expand Down
3 changes: 1 addition & 2 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -3056,10 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
if (unlikely(!xdpf))
return -EOVERFLOW;

sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
if (sent <= 0)
return sent;
dev->netdev_ops->ndo_xdp_flush(dev);
return 0;
}

Expand Down

0 comments on commit ea9916e

Please sign in to comment.