Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC] Generic metadata AF XDP #7

Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
5b67c13
uapi: add metadata flag to xdp flags
mswiatko Jul 21, 2021
2590b71
net: include xdp generic metadata definition
mswiatko Aug 10, 2021
c354ec1
ice: use xdp generic metadata
mswiatko Aug 10, 2021
d7691ff
libbpf: Return non-null error on failures in libbpf_find_prog_btf_id()
qmonnet Jul 29, 2021
caca8ad
libbpf: Rename btf__load() as btf__load_into_kernel()
qmonnet Jul 29, 2021
10d0b3b
libbpf: Rename btf__get_from_id() as btf__load_from_kernel_by_id()
qmonnet Jul 29, 2021
8f2e323
tools: Free BTF objects at various locations
qmonnet Jul 29, 2021
c0915ba
tools: Replace btf__get_from_id() with btf__load_from_kernel_by_id()
qmonnet Jul 29, 2021
54e9c44
libbpf: Add split BTF support for btf__load_from_kernel_by_id()
qmonnet Jul 29, 2021
59406b0
igc: Fix race condition in PTP Tx code
Jul 29, 2020
381d520
igc: Retrieve the TX timestamp directly (instead of in a interrupt)
vcgomes Apr 13, 2021
363fe5d
igc: Add support for multiple in-flight TX timestamps
vcgomes Apr 20, 2021
6e97273
igc: Use irq safe locks for timestamping
vcgomes Apr 24, 2021
1a083b7
tools: Add XDP_FLAGS_USE_METADATA flag
edersondisouza Aug 19, 2021
f58530a
xdp, net: Allow XDP_FLAGS_USE_METADATA to be used for link XDP
edersondisouza Aug 19, 2021
6b318b3
bpf: Export btf_obj_id and bpf_get_btf_vmlinux symbols
edersondisouza Aug 19, 2021
bcdbd55
bpf: Add btf_get_from_module function
edersondisouza Aug 19, 2021
f1f4fff
igc: XDP packet RX timestamp
edersondisouza Aug 19, 2021
3acf07a
igc: XDP packet TX timestamp
edersondisouza Jun 23, 2021
46d8aa0
ethtool,igc: Add "xdp_headroom" driver info
edersondisouza Jun 23, 2021
01a8c28
libbpf: Helpers to access XDP frame metadata
edersondisouza Jun 23, 2021
fe782fa
libbpf: Helpers to access XDP hints based on BTF definitions
edersondisouza Jul 20, 2021
613b86c
samples/bpf: XDP hints AF_XDP example
edersondisouza Jun 23, 2021
dd4f41b
libbpf,xdp,igc: Use union to split XDP TX and RX metadata
edersondisouza Sep 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions drivers/net/ethernet/intel/ice/ice.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,8 @@ struct ice_vsi {
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */

bool xdp_metadata_support; /* true if VSI should support xdp meta */

/* setup back reference, to which aggregator node this VSI
* corresponds to
*/
Expand Down
8 changes: 6 additions & 2 deletions drivers/net/ethernet/intel/ice/ice_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2377,6 +2377,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
xdp_ring->netdev = NULL;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
xdp_ring->xdp_metadata_support = vsi->xdp_metadata_support;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
Expand Down Expand Up @@ -2605,7 +2606,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
*/
static int
ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
struct netlink_ext_ack *extack, u32 flags)
{
int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
bool if_running = netif_running(vsi->netdev);
Expand All @@ -2625,6 +2626,9 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
}
}

if (flags & XDP_FLAGS_USE_METADATA)
vsi->xdp_metadata_support = true;

if (!ice_is_xdp_ena_vsi(vsi) && prog) {
vsi->num_xdp_txq = vsi->alloc_rxq;
xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
Expand Down Expand Up @@ -2678,7 +2682,7 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)

switch (xdp->command) {
case XDP_SETUP_PROG:
return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack, xdp->flags);
case XDP_SETUP_XSK_POOL:
return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
xdp->xsk.queue_id);
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/ethernet/intel/ice/ice_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,9 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
offset;
xdp_prepare_buff(&xdp, hard_start, offset, size, true);

if (likely(rx_ring->xdp_metadata_support))
ice_xdp_set_meta(&xdp, rx_desc);
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/ethernet/intel/ice/ice_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ struct ice_ring {
u16 q_handle; /* Queue handle per TC */

u8 ring_active:1; /* is ring online or not */
u8 xdp_metadata_support:1; /* is xdp metadata support */

u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
Expand All @@ -301,6 +302,8 @@ struct ice_ring {
/* CL3 - 3rd cacheline starts here */
struct xdp_rxq_info xdp_rxq;
struct sk_buff *skb;


/* CLX - the below items are only accessed infrequently and should be
* in their own cache line if possible
*/
Expand Down
10 changes: 10 additions & 0 deletions drivers/net/ethernet/intel/ice/ice_txrx_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
}

static inline void ice_xdp_set_meta(struct xdp_buff *xdp, union ice_32b_rx_flex_desc *desc)
{
struct ice_32b_rx_flex_desc_nic *flex = (struct ice_32b_rx_flex_desc_nic *)desc;
struct xdp_meta_generic *md = xdp->data - sizeof(struct xdp_meta_generic);

xdp->data_meta = md;
md->rxcvid = le16_to_cpu(flex->flex_ts.flex.vlan_id);
md->hash = le32_to_cpu(flex->rss_hash);
}

void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
Expand Down
29 changes: 25 additions & 4 deletions drivers/net/ethernet/intel/igc/igc.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/timecounter.h>
#include <linux/net_tstamp.h>
#include <linux/if_xdp.h>

#include "igc_hw.h"

Expand Down Expand Up @@ -65,6 +66,17 @@ struct igc_rx_packet_stats {
u64 other_packets;
};

#define IGC_MAX_TX_TSTAMP_TIMERS 4

struct igc_tx_timestamp_request {
struct sk_buff *skb;
unsigned long start;
u32 mask;
u32 regl;
u32 regh;
u32 flags;
};

struct igc_ring_container {
struct igc_ring *ring; /* pointer to linked list of rings */
unsigned int total_bytes; /* total bytes processed this int */
Expand Down Expand Up @@ -214,10 +226,12 @@ struct igc_adapter {

struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_caps;
struct work_struct ptp_tx_work;
struct sk_buff *ptp_tx_skb;
/* Access to ptp_tx_skb and ptp_tx_start is protected by the
* ptp_tx_lock.
*/
spinlock_t ptp_tx_lock;
struct igc_tx_timestamp_request tx_tstamp[IGC_MAX_TX_TSTAMP_TIMERS];
struct hwtstamp_config tstamp_config;
unsigned long ptp_tx_start;
unsigned int ptp_flags;
/* System time value lock */
spinlock_t tmreg_lock;
Expand All @@ -237,6 +251,9 @@ struct igc_adapter {
struct timespec64 start;
struct timespec64 period;
} perout[IGC_N_PEROUT];

u32 btf_id;
bool btf_enabled;
};

void igc_up(struct igc_adapter *adapter);
Expand Down Expand Up @@ -385,7 +402,6 @@ enum igc_state_t {
__IGC_TESTING,
__IGC_RESETTING,
__IGC_DOWN,
__IGC_PTP_TX_IN_PROGRESS,
};

enum igc_tx_flags {
Expand All @@ -397,6 +413,10 @@ enum igc_tx_flags {
/* olinfo flags */
IGC_TX_FLAGS_IPV4 = 0x10,
IGC_TX_FLAGS_CSUM = 0x20,

IGC_TX_FLAGS_TSTAMP_1 = 0x100,
IGC_TX_FLAGS_TSTAMP_2 = 0x200,
IGC_TX_FLAGS_TSTAMP_3 = 0x400,
};

enum igc_boards {
Expand Down Expand Up @@ -592,6 +612,7 @@ void igc_ptp_reset(struct igc_adapter *adapter);
void igc_ptp_suspend(struct igc_adapter *adapter);
void igc_ptp_stop(struct igc_adapter *adapter);
ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf);
void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter, u32 mask);
int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
void igc_ptp_tx_hang(struct igc_adapter *adapter);
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/ethernet/intel/igc/igc_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ struct igc_adv_tx_context_desc {

/* Adv Transmit Descriptor Config Masks */
#define IGC_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */
#define IGC_ADVTXD_TSTAMP_REG_1 0x00010000 /* IEEE1588 Timestamp packet */
#define IGC_ADVTXD_TSTAMP_REG_2 0x00020000 /* IEEE1588 Timestamp packet */
#define IGC_ADVTXD_TSTAMP_REG_3 0x00030000 /* IEEE1588 Timestamp packet */
#define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
#define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
#define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */
Expand Down
7 changes: 7 additions & 0 deletions drivers/net/ethernet/intel/igc/igc_defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,13 +404,20 @@

/* Time Sync Transmit Control bit definitions */
#define IGC_TSYNCTXCTL_TXTT_0 0x00000001 /* Tx timestamp reg 0 valid */
#define IGC_TSYNCTXCTL_TXTT_1 0x00000002 /* Tx timestamp reg 1 valid */
#define IGC_TSYNCTXCTL_TXTT_2 0x00000004 /* Tx timestamp reg 2 valid */
#define IGC_TSYNCTXCTL_TXTT_3 0x00000008 /* Tx timestamp reg 3 valid */
#define IGC_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */
#define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */
#define IGC_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */
#define IGC_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */
#define IGC_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */
#define IGC_TSYNCTXCTL_TXSYNSIG 0x00000020 /* Sample TX tstamp in PHY sop */

#define IGC_TSYNCTXCTL_TXTT_ANY ( \
IGC_TSYNCTXCTL_TXTT_0 | IGC_TSYNCTXCTL_TXTT_1 | \
IGC_TSYNCTXCTL_TXTT_2 | IGC_TSYNCTXCTL_TXTT_3)

/* Timer selection bits */
#define IGC_AUX_IO_TIMER_SEL_SYSTIM0 (0u << 30) /* Select SYSTIM0 for auxiliary time stamp */
#define IGC_AUX_IO_TIMER_SEL_SYSTIM1 (1u << 30) /* Select SYSTIM1 for auxiliary time stamp */
Expand Down
110 changes: 92 additions & 18 deletions drivers/net/ethernet/intel/igc/igc_main.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Intel Corporation */

#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/if_vlan.h>
Expand Down Expand Up @@ -1146,6 +1148,15 @@ static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
(IGC_ADVTXD_MAC_TSTAMP));

cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1,
(IGC_ADVTXD_TSTAMP_REG_1));

cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2,
(IGC_ADVTXD_TSTAMP_REG_2));

cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3,
(IGC_ADVTXD_TSTAMP_REG_3));

/* insert frame checksum */
cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);

Expand Down Expand Up @@ -1403,6 +1414,26 @@ static int igc_tso(struct igc_ring *tx_ring,
return 1;
}

static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags)
{
int i;

for (i = 0; i < IGC_MAX_TX_TSTAMP_TIMERS; i++) {
struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i];

if (tstamp->skb)
continue;

tstamp->skb = skb_get(skb);
tstamp->start = jiffies;
*flags = tstamp->flags;

return true;
}

return false;
}

static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
struct igc_ring *tx_ring)
{
Expand Down Expand Up @@ -1438,22 +1469,20 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,

if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
unsigned long flags;
u32 tstamp_flags;

spin_lock_irqsave(&adapter->ptp_tx_lock, flags);

/* FIXME: add support for retrieving timestamps from
* the other timer registers before skipping the
* timestamping request.
*/
if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
!test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
&adapter->state)) {
igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_flags |= IGC_TX_FLAGS_TSTAMP;

adapter->ptp_tx_skb = skb_get(skb);
adapter->ptp_tx_start = jiffies;
tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags;
} else {
adapter->tx_hwtstamp_skipped++;
}

spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}

if (skb_vlan_tag_present(skb)) {
Expand Down Expand Up @@ -2284,6 +2313,14 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
xdp_do_flush();
}

void igc_clean_btf_id(void *addr)
{
struct xdp_meta_generic___igc *hints;

hints = addr - sizeof(*hints);
hints->btf_id = 0;
}

static void igc_update_rx_stats(struct igc_q_vector *q_vector,
unsigned int packets, unsigned int bytes)
{
Expand Down Expand Up @@ -2347,8 +2384,21 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)

if (!skb) {
xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);

xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
igc_rx_offset(rx_ring) + pkt_offset, size, false);
igc_rx_offset(rx_ring) + pkt_offset, size,
adapter->btf_enabled);

if (adapter->btf_enabled) {
struct xdp_meta_generic___igc *hints;

hints = xdp.data - sizeof(*hints);
xdp.data_meta = hints;
hints->tstamp = timestamp;
hints->btf_id = adapter->btf_id;
} else {
igc_clean_btf_id(xdp.data);
}

skb = igc_xdp_run_prog(adapter, &xdp);
}
Expand Down Expand Up @@ -2512,12 +2562,19 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
bi->xdp->data);

bi->xdp->data += IGC_TS_HDR_LEN;

/* HW timestamp has been copied into local variable. Metadata
* length when XDP program is called should be 0.
*/
bi->xdp->data_meta += IGC_TS_HDR_LEN;
size -= IGC_TS_HDR_LEN;

if (adapter->btf_enabled) {
struct xdp_meta_generic___igc *hints;

hints = bi->xdp->data - sizeof(*hints);
bi->xdp->data_meta = hints;
hints->tstamp = timestamp;
hints->btf_id = adapter->btf_id;
} else {
igc_clean_btf_id(bi->xdp->data);
}
}

bi->xdp->data_end = bi->xdp->data + size;
Expand Down Expand Up @@ -4179,6 +4236,19 @@ static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
return err;
}

static void igc_btf_init(struct igc_adapter *adapter)
{
struct module *owner = THIS_MODULE;
struct btf *btf;

if (owner)
btf = btf_get_from_module(owner);
else
btf = bpf_get_btf_vmlinux();

adapter->btf_id = btf_obj_id(btf);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this btf_id of whole definition for module or vmlinux? Maybe something like that to get id of used structure:
adapter->btf_id = btf_find_by_name_kind(btf, "xdp_meta_generic__igc", BTF_KIND_STRUCT);
What do You think?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested here, it doesn't really help. I still need the base (vmlinux) and module btf to parse the btf for the struct. So, i'd need to send two ids instead of one. Following is a snippet of the code I ended up testing with:

    base = btf__parse("/sys/kernel/btf/vmlinux", NULL);
    btf = btf__load_from_kernel_by_id_split(66, base); // "66" is the module btf_id
    t = btf__type_by_id(btf, 133298); // "133298" is the xdp_meta_generic btf_id

Copy link
Collaborator

@mswiatko mswiatko Sep 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can You try btf_get_type_id from this commit 36335fe ?

Why do You need btf? I thought type id will be enough in hints case.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take a look! I needed to also get btf, is because the type is, IIUC, relative to the module. So btf__get_from_id(base) fails.

Copy link
Author

@edersondisouza edersondisouza Sep 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Err, I mean, btf__type_by_id. I took a look at using btf_get_type_id. It gives me the BTF type ID of the xdp_meta_generic struct. But that alone is not useful on user space - I need the BTF ID for the module as well. Check Andrii's email to Jesper [1].

So it's kinda pointless to include only the BTF type ID. If we include the BTF ID for the module, it's easy to check if any type there has prefix name "xdp_meta_generic". Unless I'm missing a way to get the BTF ID for the module - is there one?

[1]https://www.spinics.net/lists/bpf/msg45746.html

}

/**
* igc_sw_init - Initialize general software structures (struct igc_adapter)
* @adapter: board private structure to initialize
Expand Down Expand Up @@ -4232,6 +4302,8 @@ static int igc_sw_init(struct igc_adapter *adapter)

set_bit(__IGC_DOWN, &adapter->state);

igc_btf_init(adapter);

return 0;
}

Expand Down Expand Up @@ -4682,8 +4754,10 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
}

if (tsicr & IGC_TSICR_TXTS) {
/* retrieve hardware timestamp */
schedule_work(&adapter->ptp_tx_work);
u32 tsynctxctl = rd32(IGC_TSYNCTXCTL);;

igc_ptp_tx_hwtstamp(adapter, tsynctxctl & IGC_TSYNCTXCTL_TXTT_ANY);

ack |= IGC_TSICR_TXTS;
}

Expand Down Expand Up @@ -5601,7 +5675,7 @@ static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)

switch (bpf->command) {
case XDP_SETUP_PROG:
return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack, bpf->flags);
case XDP_SETUP_XSK_POOL:
return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
bpf->xsk.queue_id);
Expand Down
Loading