Merge branch 'dpaa2-eth-Introduce-XDP-support'

Ioana Ciocoi Radulescu says:

====================
dpaa2-eth: Introduce XDP support

Add support for XDP programs. Only XDP_PASS, XDP_DROP and XDP_TX
actions are supported for now. Frame header changes are also
allowed.

v2: - count the XDP packets in the rx/tx inteface stats
    - add message with the maximum supported MTU value for XDP
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-11-28 10:57:46 -08:00
commit 5827541067
3 changed files with 350 additions and 38 deletions

View File

@ -13,7 +13,8 @@
#include <linux/iommu.h>
#include <linux/net_tstamp.h>
#include <linux/fsl/mc.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/sock.h>
#include "dpaa2-eth.h"
@ -86,7 +87,7 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
addr = dpaa2_sg_get_addr(&sgt[i]);
sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
DMA_FROM_DEVICE);
DMA_BIDIRECTIONAL);
skb_free_frag(sg_vaddr);
if (dpaa2_sg_is_final(&sgt[i]))
@ -144,7 +145,7 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
sg_addr = dpaa2_sg_get_addr(sge);
sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr);
dma_unmap_single(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
DMA_FROM_DEVICE);
DMA_BIDIRECTIONAL);
sg_length = dpaa2_sg_get_len(sge);
@ -199,6 +200,141 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
return skb;
}
/* Free buffers acquired from the buffer pool or which were meant to
* be released in the pool
*/
static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
{
struct device *dev = priv->net_dev->dev.parent;
void *vaddr;
int i;
for (i = 0; i < count; i++) {
vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
dma_unmap_single(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
DMA_BIDIRECTIONAL);
skb_free_frag(vaddr);
}
}
static void xdp_release_buf(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
dma_addr_t addr)
{
int err;
ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
if (ch->xdp.drop_cnt < DPAA2_ETH_BUFS_PER_CMD)
return;
while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
ch->xdp.drop_bufs,
ch->xdp.drop_cnt)) == -EBUSY)
cpu_relax();
if (err) {
free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
ch->buf_count -= ch->xdp.drop_cnt;
}
ch->xdp.drop_cnt = 0;
}
static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
void *buf_start, u16 queue_id)
{
struct dpaa2_eth_fq *fq;
struct dpaa2_faead *faead;
u32 ctrl, frc;
int i, err;
/* Mark the egress frame hardware annotation area as valid */
frc = dpaa2_fd_get_frc(fd);
dpaa2_fd_set_frc(fd, frc | DPAA2_FD_FRC_FAEADV);
dpaa2_fd_set_ctrl(fd, DPAA2_FD_CTRL_ASAL);
/* Instruct hardware to release the FD buffer directly into
* the buffer pool once transmission is completed, instead of
* sending a Tx confirmation frame to us
*/
ctrl = DPAA2_FAEAD_A4V | DPAA2_FAEAD_A2V | DPAA2_FAEAD_EBDDV;
faead = dpaa2_get_faead(buf_start, false);
faead->ctrl = cpu_to_le32(ctrl);
faead->conf_fqid = 0;
fq = &priv->fq[queue_id];
for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
err = dpaa2_io_service_enqueue_qd(fq->channel->dpio,
priv->tx_qdid, 0,
fq->tx_qdbin, fd);
if (err != -EBUSY)
break;
}
return err;
}
static u32 run_xdp(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
struct dpaa2_eth_fq *rx_fq,
struct dpaa2_fd *fd, void *vaddr)
{
dma_addr_t addr = dpaa2_fd_get_addr(fd);
struct rtnl_link_stats64 *percpu_stats;
struct bpf_prog *xdp_prog;
struct xdp_buff xdp;
u32 xdp_act = XDP_PASS;
int err;
percpu_stats = this_cpu_ptr(priv->percpu_stats);
rcu_read_lock();
xdp_prog = READ_ONCE(ch->xdp.prog);
if (!xdp_prog)
goto out;
xdp.data = vaddr + dpaa2_fd_get_offset(fd);
xdp.data_end = xdp.data + dpaa2_fd_get_len(fd);
xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
xdp_set_data_meta_invalid(&xdp);
xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
/* xdp.data pointer may have changed */
dpaa2_fd_set_offset(fd, xdp.data - vaddr);
dpaa2_fd_set_len(fd, xdp.data_end - xdp.data);
switch (xdp_act) {
case XDP_PASS:
break;
case XDP_TX:
err = xdp_enqueue(priv, fd, vaddr, rx_fq->flowid);
if (err) {
xdp_release_buf(priv, ch, addr);
percpu_stats->tx_errors++;
ch->stats.xdp_tx_err++;
} else {
percpu_stats->tx_packets++;
percpu_stats->tx_bytes += dpaa2_fd_get_len(fd);
ch->stats.xdp_tx++;
}
break;
default:
bpf_warn_invalid_xdp_action(xdp_act);
case XDP_ABORTED:
trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
case XDP_DROP:
xdp_release_buf(priv, ch, addr);
ch->stats.xdp_drop++;
break;
}
out:
rcu_read_unlock();
return xdp_act;
}
/* Main Rx frame processing routine */
static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch,
@ -215,12 +351,14 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
struct dpaa2_fas *fas;
void *buf_data;
u32 status = 0;
u32 xdp_act;
/* Tracing point */
trace_dpaa2_rx_fd(priv->net_dev, fd);
vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE, DMA_FROM_DEVICE);
dma_sync_single_for_cpu(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
DMA_BIDIRECTIONAL);
fas = dpaa2_get_fas(vaddr, false);
prefetch(fas);
@ -231,8 +369,21 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
percpu_extras = this_cpu_ptr(priv->percpu_extras);
if (fd_format == dpaa2_fd_single) {
xdp_act = run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
if (xdp_act != XDP_PASS) {
percpu_stats->rx_packets++;
percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
return;
}
dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
DMA_BIDIRECTIONAL);
skb = build_linear_skb(ch, fd, vaddr);
} else if (fd_format == dpaa2_fd_sg) {
WARN_ON(priv->xdp_prog);
dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
DMA_BIDIRECTIONAL);
skb = build_frag_skb(priv, ch, buf_data);
skb_free_frag(vaddr);
percpu_extras->rx_sg_frames++;
@ -319,7 +470,6 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
return 0;
fq->stats.frames += cleaned;
ch->stats.frames += cleaned;
/* A dequeue operation only pulls frames from a single queue
* into the store. Return the frame queue as an out param.
@ -743,23 +893,6 @@ static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
return 0;
}
/* Free buffers acquired from the buffer pool or which were meant to
* be released in the pool
*/
static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
{
struct device *dev = priv->net_dev->dev.parent;
void *vaddr;
int i;
for (i = 0; i < count; i++) {
vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
dma_unmap_single(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
DMA_FROM_DEVICE);
skb_free_frag(vaddr);
}
}
/* Perform a single release command to add buffers
* to the specified buffer pool
*/
@ -783,7 +916,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
buf = PTR_ALIGN(buf, priv->rx_buf_align);
addr = dma_map_single(dev, buf, DPAA2_ETH_RX_BUF_SIZE,
DMA_FROM_DEVICE);
DMA_BIDIRECTIONAL);
if (unlikely(dma_mapping_error(dev, addr)))
goto err_map;
@ -1427,6 +1560,174 @@ static int dpaa2_eth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
return -EINVAL;
}
static bool xdp_mtu_valid(struct dpaa2_eth_priv *priv, int mtu)
{
int mfl, linear_mfl;
mfl = DPAA2_ETH_L2_MAX_FRM(mtu);
linear_mfl = DPAA2_ETH_RX_BUF_SIZE - DPAA2_ETH_RX_HWA_SIZE -
dpaa2_eth_rx_head_room(priv) - XDP_PACKET_HEADROOM;
if (mfl > linear_mfl) {
netdev_warn(priv->net_dev, "Maximum MTU for XDP is %d\n",
linear_mfl - VLAN_ETH_HLEN);
return false;
}
return true;
}
static int set_rx_mfl(struct dpaa2_eth_priv *priv, int mtu, bool has_xdp)
{
int mfl, err;
/* We enforce a maximum Rx frame length based on MTU only if we have
* an XDP program attached (in order to avoid Rx S/G frames).
* Otherwise, we accept all incoming frames as long as they are not
* larger than maximum size supported in hardware
*/
if (has_xdp)
mfl = DPAA2_ETH_L2_MAX_FRM(mtu);
else
mfl = DPAA2_ETH_MFL;
err = dpni_set_max_frame_length(priv->mc_io, 0, priv->mc_token, mfl);
if (err) {
netdev_err(priv->net_dev, "dpni_set_max_frame_length failed\n");
return err;
}
return 0;
}
static int dpaa2_eth_change_mtu(struct net_device *dev, int new_mtu)
{
struct dpaa2_eth_priv *priv = netdev_priv(dev);
int err;
if (!priv->xdp_prog)
goto out;
if (!xdp_mtu_valid(priv, new_mtu))
return -EINVAL;
err = set_rx_mfl(priv, new_mtu, true);
if (err)
return err;
out:
dev->mtu = new_mtu;
return 0;
}
static int update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
{
struct dpni_buffer_layout buf_layout = {0};
int err;
err = dpni_get_buffer_layout(priv->mc_io, 0, priv->mc_token,
DPNI_QUEUE_RX, &buf_layout);
if (err) {
netdev_err(priv->net_dev, "dpni_get_buffer_layout failed\n");
return err;
}
/* Reserve extra headroom for XDP header size changes */
buf_layout.data_head_room = dpaa2_eth_rx_head_room(priv) +
(has_xdp ? XDP_PACKET_HEADROOM : 0);
buf_layout.options = DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM;
err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
DPNI_QUEUE_RX, &buf_layout);
if (err) {
netdev_err(priv->net_dev, "dpni_set_buffer_layout failed\n");
return err;
}
return 0;
}
static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
{
struct dpaa2_eth_priv *priv = netdev_priv(dev);
struct dpaa2_eth_channel *ch;
struct bpf_prog *old;
bool up, need_update;
int i, err;
if (prog && !xdp_mtu_valid(priv, dev->mtu))
return -EINVAL;
if (prog) {
prog = bpf_prog_add(prog, priv->num_channels);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
up = netif_running(dev);
need_update = (!!priv->xdp_prog != !!prog);
if (up)
dpaa2_eth_stop(dev);
/* While in xdp mode, enforce a maximum Rx frame size based on MTU.
* Also, when switching between xdp/non-xdp modes we need to reconfigure
* our Rx buffer layout. Buffer pool was drained on dpaa2_eth_stop,
* so we are sure no old format buffers will be used from now on.
*/
if (need_update) {
err = set_rx_mfl(priv, dev->mtu, !!prog);
if (err)
goto out_err;
err = update_rx_buffer_headroom(priv, !!prog);
if (err)
goto out_err;
}
old = xchg(&priv->xdp_prog, prog);
if (old)
bpf_prog_put(old);
for (i = 0; i < priv->num_channels; i++) {
ch = priv->channel[i];
old = xchg(&ch->xdp.prog, prog);
if (old)
bpf_prog_put(old);
}
if (up) {
err = dpaa2_eth_open(dev);
if (err)
return err;
}
return 0;
out_err:
if (prog)
bpf_prog_sub(prog, priv->num_channels);
if (up)
dpaa2_eth_open(dev);
return err;
}
static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct dpaa2_eth_priv *priv = netdev_priv(dev);
switch (xdp->command) {
case XDP_SETUP_PROG:
return setup_xdp(dev, xdp->prog);
case XDP_QUERY_PROG:
xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
break;
default:
return -EINVAL;
}
return 0;
}
static const struct net_device_ops dpaa2_eth_ops = {
.ndo_open = dpaa2_eth_open,
.ndo_start_xmit = dpaa2_eth_tx,
@ -1436,6 +1737,8 @@ static const struct net_device_ops dpaa2_eth_ops = {
.ndo_set_rx_mode = dpaa2_eth_set_rx_mode,
.ndo_set_features = dpaa2_eth_set_features,
.ndo_do_ioctl = dpaa2_eth_ioctl,
.ndo_change_mtu = dpaa2_eth_change_mtu,
.ndo_bpf = dpaa2_eth_xdp,
};
static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)

View File

@ -139,7 +139,9 @@ struct dpaa2_faead {
};
#define DPAA2_FAEAD_A2V 0x20000000
#define DPAA2_FAEAD_A4V 0x08000000
#define DPAA2_FAEAD_UPDV 0x00001000
#define DPAA2_FAEAD_EBDDV 0x00002000
#define DPAA2_FAEAD_UPD 0x00000010
/* Accessors for the hardware annotation fields that we use */
@ -243,12 +245,14 @@ struct dpaa2_eth_fq_stats {
struct dpaa2_eth_ch_stats {
/* Volatile dequeues retried due to portal busy */
__u64 dequeue_portal_busy;
/* Number of CDANs; useful to estimate avg NAPI len */
__u64 cdan;
/* Number of frames received on queues from this channel */
__u64 frames;
/* Pull errors */
__u64 pull_err;
/* Number of CDANs; useful to estimate avg NAPI len */
__u64 cdan;
/* XDP counters */
__u64 xdp_drop;
__u64 xdp_tx;
__u64 xdp_tx_err;
};
/* Maximum number of queues associated with a DPNI */
@ -283,6 +287,12 @@ struct dpaa2_eth_fq {
struct dpaa2_eth_fq_stats stats;
};
struct dpaa2_eth_ch_xdp {
struct bpf_prog *prog;
u64 drop_bufs[DPAA2_ETH_BUFS_PER_CMD];
int drop_cnt;
};
struct dpaa2_eth_channel {
struct dpaa2_io_notification_ctx nctx;
struct fsl_mc_device *dpcon;
@ -294,6 +304,7 @@ struct dpaa2_eth_channel {
struct dpaa2_eth_priv *priv;
int buf_count;
struct dpaa2_eth_ch_stats stats;
struct dpaa2_eth_ch_xdp xdp;
};
struct dpaa2_eth_dist_fields {
@ -353,6 +364,7 @@ struct dpaa2_eth_priv {
u64 rx_hash_fields;
struct dpaa2_eth_cls_rule *cls_rules;
u8 rx_cls_enabled;
struct bpf_prog *xdp_prog;
};
#define DPAA2_RXH_SUPPORTED (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \

View File

@ -45,6 +45,9 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
"[drv] dequeue portal busy",
"[drv] channel pull errors",
"[drv] cdan",
"[drv] xdp drop",
"[drv] xdp tx",
"[drv] xdp tx errors",
};
#define DPAA2_ETH_NUM_EXTRA_STATS ARRAY_SIZE(dpaa2_ethtool_extras)
@ -174,8 +177,6 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
int j, k, err;
int num_cnt;
union dpni_statistics dpni_stats;
u64 cdan = 0;
u64 portal_busy = 0, pull_err = 0;
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
struct dpaa2_eth_drv_stats *extras;
struct dpaa2_eth_ch_stats *ch_stats;
@ -212,16 +213,12 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
}
i += j;
for (j = 0; j < priv->num_channels; j++) {
ch_stats = &priv->channel[j]->stats;
cdan += ch_stats->cdan;
portal_busy += ch_stats->dequeue_portal_busy;
pull_err += ch_stats->pull_err;
/* Per-channel stats */
for (k = 0; k < priv->num_channels; k++) {
ch_stats = &priv->channel[k]->stats;
for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64); j++)
*((__u64 *)data + i + j) += *((__u64 *)ch_stats + j);
}
*(data + i++) = portal_busy;
*(data + i++) = pull_err;
*(data + i++) = cdan;
}
static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,