Skip to content

Commit

Permalink
Merge branch 'dpaa2-eth-send-a-scatter-gather-FD-instead-of-realloc-ing'
Browse files Browse the repository at this point in the history
Ioana Ciornei says:

====================
dpaa2-eth: send a scatter-gather FD instead of realloc-ing

This patch set changes the behaviour in case the Tx path is confroted
with an SKB with insufficient headroom for our hardware necessities (SW
annotation area). In the first patch, instead of realloc-ing the SKB we
now send a S/G frames descriptor while the second one adds a new
software held counter to account for for these types of frames.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Jun 30, 2020
2 parents 17af2c4 + 4c96c0a commit 5fb6237
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 32 deletions.
4 changes: 2 additions & 2 deletions drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset)
seq_printf(file, "Per-CPU stats for %s\n", priv->net_dev->name);
seq_printf(file, "%s%16s%16s%16s%16s%16s%16s%16s%16s%16s\n",
"CPU", "Rx", "Rx Err", "Rx SG", "Tx", "Tx Err", "Tx conf",
"Tx SG", "Tx realloc", "Enq busy");
"Tx SG", "Tx converted to SG", "Enq busy");

for_each_online_cpu(i) {
stats = per_cpu_ptr(priv->percpu_stats, i);
Expand All @@ -35,7 +35,7 @@ static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset)
stats->tx_errors,
extras->tx_conf_frames,
extras->tx_sg_frames,
extras->tx_reallocs,
extras->tx_converted_sg_frames,
extras->tx_portal_busy);
}

Expand Down
179 changes: 151 additions & 28 deletions drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,86 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
return err;
}

/* Create a SG frame descriptor based on a linear skb.
*
* This function is used on the Tx path when the skb headroom is not large
* enough for the HW requirements, thus instead of realloc-ing the skb we
* create a SG frame descriptor with only one entry.
*/
static int build_sg_fd_single_buf(struct dpaa2_eth_priv *priv,
struct sk_buff *skb,
struct dpaa2_fd *fd)
{
struct device *dev = priv->net_dev->dev.parent;
struct dpaa2_eth_sgt_cache *sgt_cache;
struct dpaa2_sg_entry *sgt;
struct dpaa2_eth_swa *swa;
dma_addr_t addr, sgt_addr;
void *sgt_buf = NULL;
int sgt_buf_size;
int err;

/* Prepare the HW SGT structure */
sgt_cache = this_cpu_ptr(priv->sgt_cache);
sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry);

if (sgt_cache->count == 0)
sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN,
GFP_ATOMIC);
else
sgt_buf = sgt_cache->buf[--sgt_cache->count];
if (unlikely(!sgt_buf))
return -ENOMEM;

sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);

addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL);
if (unlikely(dma_mapping_error(dev, addr))) {
err = -ENOMEM;
goto data_map_failed;
}

/* Fill in the HW SGT structure */
dpaa2_sg_set_addr(sgt, addr);
dpaa2_sg_set_len(sgt, skb->len);
dpaa2_sg_set_final(sgt, true);

/* Store the skb backpointer in the SGT buffer */
swa = (struct dpaa2_eth_swa *)sgt_buf;
swa->type = DPAA2_ETH_SWA_SINGLE;
swa->single.skb = skb;
swa->sg.sgt_size = sgt_buf_size;

/* Separately map the SGT buffer */
sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
if (unlikely(dma_mapping_error(dev, sgt_addr))) {
err = -ENOMEM;
goto sgt_map_failed;
}

dpaa2_fd_set_offset(fd, priv->tx_data_offset);
dpaa2_fd_set_format(fd, dpaa2_fd_sg);
dpaa2_fd_set_addr(fd, sgt_addr);
dpaa2_fd_set_len(fd, skb->len);
dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);

if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
enable_tx_tstamp(fd, sgt_buf);

return 0;

sgt_map_failed:
dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL);
data_map_failed:
if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
kfree(sgt_buf);
else
sgt_cache->buf[sgt_cache->count++] = sgt_buf;

return err;
}

/* Create a frame descriptor based on a linear skb */
static int build_single_fd(struct dpaa2_eth_priv *priv,
struct sk_buff *skb,
Expand Down Expand Up @@ -743,13 +823,16 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
const struct dpaa2_fd *fd, bool in_napi)
{
struct device *dev = priv->net_dev->dev.parent;
dma_addr_t fd_addr;
dma_addr_t fd_addr, sg_addr;
struct sk_buff *skb = NULL;
unsigned char *buffer_start;
struct dpaa2_eth_swa *swa;
u8 fd_format = dpaa2_fd_get_format(fd);
u32 fd_len = dpaa2_fd_get_len(fd);

struct dpaa2_eth_sgt_cache *sgt_cache;
struct dpaa2_sg_entry *sgt;

fd_addr = dpaa2_fd_get_addr(fd);
buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
swa = (struct dpaa2_eth_swa *)buffer_start;
Expand All @@ -769,16 +852,29 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
DMA_BIDIRECTIONAL);
}
} else if (fd_format == dpaa2_fd_sg) {
skb = swa->sg.skb;
if (swa->type == DPAA2_ETH_SWA_SG) {
skb = swa->sg.skb;

/* Unmap the scatterlist */
dma_unmap_sg(dev, swa->sg.scl, swa->sg.num_sg,
DMA_BIDIRECTIONAL);
kfree(swa->sg.scl);

/* Unmap the scatterlist */
dma_unmap_sg(dev, swa->sg.scl, swa->sg.num_sg,
DMA_BIDIRECTIONAL);
kfree(swa->sg.scl);
/* Unmap the SGT buffer */
dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
DMA_BIDIRECTIONAL);
} else {
skb = swa->single.skb;

/* Unmap the SGT buffer */
dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
DMA_BIDIRECTIONAL);
/* Unmap the SGT Buffer */
dma_unmap_single(dev, fd_addr, swa->single.sgt_size,
DMA_BIDIRECTIONAL);

sgt = (struct dpaa2_sg_entry *)(buffer_start +
priv->tx_data_offset);
sg_addr = dpaa2_sg_get_addr(sgt);
dma_unmap_single(dev, sg_addr, skb->len, DMA_BIDIRECTIONAL);
}
} else {
netdev_dbg(priv->net_dev, "Invalid FD format\n");
return;
Expand Down Expand Up @@ -808,8 +904,17 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
}

/* Free SGT buffer allocated on tx */
if (fd_format != dpaa2_fd_single)
skb_free_frag(buffer_start);
if (fd_format != dpaa2_fd_single) {
sgt_cache = this_cpu_ptr(priv->sgt_cache);
if (swa->type == DPAA2_ETH_SWA_SG) {
skb_free_frag(buffer_start);
} else {
if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE)
kfree(buffer_start);
else
sgt_cache->buf[sgt_cache->count++] = buffer_start;
}
}

/* Move on with skb release */
napi_consume_skb(skb, in_napi);
Expand All @@ -833,22 +938,6 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
percpu_extras = this_cpu_ptr(priv->percpu_extras);

needed_headroom = dpaa2_eth_needed_headroom(priv, skb);
if (skb_headroom(skb) < needed_headroom) {
struct sk_buff *ns;

ns = skb_realloc_headroom(skb, needed_headroom);
if (unlikely(!ns)) {
percpu_stats->tx_dropped++;
goto err_alloc_headroom;
}
percpu_extras->tx_reallocs++;

if (skb->sk)
skb_set_owner_w(ns, skb->sk);

dev_kfree_skb(skb);
skb = ns;
}

/* We'll be holding a back-reference to the skb until Tx Confirmation;
* we don't want that overwritten by a concurrent Tx with a cloned skb.
Expand All @@ -867,6 +956,12 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
err = build_sg_fd(priv, skb, &fd);
percpu_extras->tx_sg_frames++;
percpu_extras->tx_sg_bytes += skb->len;
} else if (skb_headroom(skb) < needed_headroom) {
err = build_sg_fd_single_buf(priv, skb, &fd);
percpu_extras->tx_sg_frames++;
percpu_extras->tx_sg_bytes += skb->len;
percpu_extras->tx_converted_sg_frames++;
percpu_extras->tx_converted_sg_bytes += skb->len;
} else {
err = build_single_fd(priv, skb, &fd);
}
Expand Down Expand Up @@ -924,7 +1019,6 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
return NETDEV_TX_OK;

err_build_fd:
err_alloc_headroom:
dev_kfree_skb(skb);

return NETDEV_TX_OK;
Expand Down Expand Up @@ -1161,6 +1255,22 @@ static int refill_pool(struct dpaa2_eth_priv *priv,
return 0;
}

static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv)
{
struct dpaa2_eth_sgt_cache *sgt_cache;
u16 count;
int k, i;

for_each_online_cpu(k) {
sgt_cache = per_cpu_ptr(priv->sgt_cache, k);
count = sgt_cache->count;

for (i = 0; i < count; i++)
kfree(sgt_cache->buf[i]);
sgt_cache->count = 0;
}
}

static int pull_channel(struct dpaa2_eth_channel *ch)
{
int err;
Expand Down Expand Up @@ -1562,6 +1672,9 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
/* Empty the buffer pool */
drain_pool(priv);

/* Empty the Scatter-Gather Buffer cache */
dpaa2_eth_sgt_cache_drain(priv);

return 0;
}

Expand Down Expand Up @@ -3846,6 +3959,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
goto err_alloc_percpu_extras;
}

priv->sgt_cache = alloc_percpu(*priv->sgt_cache);
if (!priv->sgt_cache) {
dev_err(dev, "alloc_percpu(sgt_cache) failed\n");
err = -ENOMEM;
goto err_alloc_sgt_cache;
}

err = netdev_init(net_dev);
if (err)
goto err_netdev_init;
Expand Down Expand Up @@ -3914,6 +4034,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
err_alloc_rings:
err_csum:
err_netdev_init:
free_percpu(priv->sgt_cache);
err_alloc_sgt_cache:
free_percpu(priv->percpu_extras);
err_alloc_percpu_extras:
free_percpu(priv->percpu_stats);
Expand Down Expand Up @@ -3959,6 +4081,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
fsl_mc_free_irqs(ls_dev);

free_rings(priv);
free_percpu(priv->sgt_cache);
free_percpu(priv->percpu_stats);
free_percpu(priv->percpu_extras);

Expand Down
12 changes: 11 additions & 1 deletion drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ struct dpaa2_eth_swa {
union {
struct {
struct sk_buff *skb;
int sgt_size;
} single;
struct {
struct sk_buff *skb;
Expand Down Expand Up @@ -282,9 +283,11 @@ struct dpaa2_eth_drv_stats {
__u64 tx_conf_bytes;
__u64 tx_sg_frames;
__u64 tx_sg_bytes;
__u64 tx_reallocs;
__u64 rx_sg_frames;
__u64 rx_sg_bytes;
/* Linear skbs sent as a S/G FD due to insufficient headroom */
__u64 tx_converted_sg_frames;
__u64 tx_converted_sg_bytes;
/* Enqueues retried due to portal busy */
__u64 tx_portal_busy;
};
Expand Down Expand Up @@ -395,6 +398,12 @@ struct dpaa2_eth_cls_rule {
u8 in_use;
};

#define DPAA2_ETH_SGT_CACHE_SIZE 256
struct dpaa2_eth_sgt_cache {
void *buf[DPAA2_ETH_SGT_CACHE_SIZE];
u16 count;
};

/* Driver private data */
struct dpaa2_eth_priv {
struct net_device *net_dev;
Expand All @@ -409,6 +418,7 @@ struct dpaa2_eth_priv {

u8 num_channels;
struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS];
struct dpaa2_eth_sgt_cache __percpu *sgt_cache;

struct dpni_attr dpni_attrs;
u16 dpni_ver_major;
Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
"[drv] tx conf bytes",
"[drv] tx sg frames",
"[drv] tx sg bytes",
"[drv] tx realloc frames",
"[drv] rx sg frames",
"[drv] rx sg bytes",
"[drv] tx converted sg frames",
"[drv] tx converted sg bytes",
"[drv] enqueue portal busy",
/* Channel stats */
"[drv] dequeue portal busy",
Expand Down

0 comments on commit 5fb6237

Please sign in to comment.