linux_dsm_epyc7002/drivers/net/hyperv/netvsc_bpf.c
Jesper Dangaard Brouer 7358877ac1 hv_netvsc: Add XDP frame size to driver
The hyperv NIC driver does memory allocation and copy even without XDP.
In XDP mode it will allocate a new page for each packet and copy over
the payload, before invoking the XDP BPF-prog.

The positive thing it that its easy to determine the xdp.frame_sz.

The XDP implementation for hv_netvsc transparently passes xdp_prog
to the associated VF NIC. Many of the Azure VMs are using SRIOV, so
majority of the data are actually processed directly on the VF driver's XDP
path. So the overhead of the synthetic data path (hv_netvsc) is minimal.

Then XDP is enabled on this driver, XDP_PASS and XDP_TX will create the
SKB via build_skb (based on the newly allocated page). Now using XDP
frame_sz this will provide more skb_tailroom, which netstack can use for
SKB coalescing (e.g tcp_try_coalesce -> skb_try_coalesce).

V3: Adjust patch desc to be more positive.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Link: https://lore.kernel.org/bpf/158945339857.97035.10212138582505736163.stgit@firesoul
2020-05-14 21:21:54 -07:00

220 lines
4.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2019, Microsoft Corporation.
*
* Author:
* Haiyang Zhang <haiyangz@microsoft.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/kernel.h>
#include <net/xdp.h>
#include <linux/mutex.h>
#include <linux/rtnetlink.h>
#include "hyperv_net.h"
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
struct xdp_buff *xdp)
{
void *data = nvchan->rsc.data[0];
u32 len = nvchan->rsc.len[0];
struct page *page = NULL;
struct bpf_prog *prog;
u32 act = XDP_PASS;
xdp->data_hard_start = NULL;
rcu_read_lock();
prog = rcu_dereference(nvchan->bpf_prog);
if (!prog)
goto out;
/* allocate page buffer for data */
page = alloc_page(GFP_ATOMIC);
if (!page) {
act = XDP_DROP;
goto out;
}
xdp->data_hard_start = page_address(page);
xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM;
xdp_set_data_meta_invalid(xdp);
xdp->data_end = xdp->data + len;
xdp->rxq = &nvchan->xdp_rxq;
xdp->frame_sz = PAGE_SIZE;
xdp->handle = 0;
memcpy(xdp->data, data, len);
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
case XDP_TX:
case XDP_DROP:
break;
case XDP_ABORTED:
trace_xdp_exception(ndev, prog, act);
break;
default:
bpf_warn_invalid_xdp_action(act);
}
out:
rcu_read_unlock();
if (page && act != XDP_PASS && act != XDP_TX) {
__free_page(page);
xdp->data_hard_start = NULL;
}
return act;
}
unsigned int netvsc_xdp_fraglen(unsigned int len)
{
return SKB_DATA_ALIGN(len) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}
struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev)
{
return rtnl_dereference(nvdev->chan_table[0].bpf_prog);
}
int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack,
struct netvsc_device *nvdev)
{
struct bpf_prog *old_prog;
int buf_max, i;
old_prog = netvsc_xdp_get(nvdev);
if (!old_prog && !prog)
return 0;
buf_max = NETVSC_XDP_HDRM + netvsc_xdp_fraglen(dev->mtu + ETH_HLEN);
if (prog && buf_max > PAGE_SIZE) {
netdev_err(dev, "XDP: mtu:%u too large, buf_max:%u\n",
dev->mtu, buf_max);
NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
return -EOPNOTSUPP;
}
if (prog && (dev->features & NETIF_F_LRO)) {
netdev_err(dev, "XDP: not support LRO\n");
NL_SET_ERR_MSG_MOD(extack, "XDP: not support LRO");
return -EOPNOTSUPP;
}
if (prog)
bpf_prog_add(prog, nvdev->num_chn - 1);
for (i = 0; i < nvdev->num_chn; i++)
rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog);
if (old_prog)
for (i = 0; i < nvdev->num_chn; i++)
bpf_prog_put(old_prog);
return 0;
}
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
{
struct netdev_bpf xdp;
bpf_op_t ndo_bpf;
int ret;
ASSERT_RTNL();
if (!vf_netdev)
return 0;
ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
if (!ndo_bpf)
return 0;
memset(&xdp, 0, sizeof(xdp));
if (prog)
bpf_prog_inc(prog);
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
ret = ndo_bpf(vf_netdev, &xdp);
if (ret && prog)
bpf_prog_put(prog);
return ret;
}
static u32 netvsc_xdp_query(struct netvsc_device *nvdev)
{
struct bpf_prog *prog = netvsc_xdp_get(nvdev);
if (prog)
return prog->aux->id;
return 0;
}
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
struct net_device_context *ndevctx = netdev_priv(dev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
struct netlink_ext_ack *extack = bpf->extack;
int ret;
if (!nvdev || nvdev->destroy) {
if (bpf->command == XDP_QUERY_PROG) {
bpf->prog_id = 0;
return 0; /* Query must always succeed */
} else {
return -ENODEV;
}
}
switch (bpf->command) {
case XDP_SETUP_PROG:
ret = netvsc_xdp_set(dev, bpf->prog, extack, nvdev);
if (ret)
return ret;
ret = netvsc_vf_setxdp(vf_netdev, bpf->prog);
if (ret) {
netdev_err(dev, "vf_setxdp failed:%d\n", ret);
NL_SET_ERR_MSG_MOD(extack, "vf_setxdp failed");
netvsc_xdp_set(dev, NULL, extack, nvdev);
}
return ret;
case XDP_QUERY_PROG:
bpf->prog_id = netvsc_xdp_query(nvdev);
return 0;
default:
return -EINVAL;
}
}