linux_dsm_epyc7002/net/xfrm/xfrm_device.c
Florian Westphal ec30d78c14 xfrm: add xdst pcpu cache
retain last used xfrm_dst in a pcpu cache.
On next request, reuse this dst if the policies are the same.

The cache will not help with strict RR workloads as there is no hit.

The cache packet-path part is reasonably small, the notifier part is
needed so we do not add long hangs when a device is dismantled but some
pcpu xdst still holds a reference, there are also calls to the flush
operation when userspace deletes SAs so modules can be removed
(there is no hit.

We need to run the dst_release on the correct cpu to avoid races with
packet path.  This is done by adding a work_struct for each cpu and then
doing the actual test/release on each affected cpu via schedule_work_on().

Test results using 4 network namespaces and null encryption:

ns1           ns2          -> ns3           -> ns4
netperf -> xfrm/null enc   -> xfrm/null dec -> netserver

what                    TCP_STREAM      UDP_STREAM      UDP_RR
Flow cache:             14644.61        294.35          327231.64
No flow cache:		14349.81	242.64		202301.72
Pcpu cache:		14629.70	292.21		205595.22

UDP tests used 64byte packets, tests ran for one minute each,
value is average over ten iterations.

'Flow cache' is 'net-next', 'No flow cache' is net-next plus this
series but without this patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-07-18 11:13:41 -07:00

211 lines
4.6 KiB
C

/*
* xfrm_device.c - IPsec device offloading code.
*
* Copyright (c) 2015 secunet Security Networks AG
*
* Author:
* Steffen Klassert <steffen.klassert@secunet.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/dst.h>
#include <net/xfrm.h>
#include <linux/notifier.h>
#ifdef CONFIG_XFRM_OFFLOAD
int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
{
int err;
struct xfrm_state *x;
struct xfrm_offload *xo = xfrm_offload(skb);
if (skb_is_gso(skb))
return 0;
if (xo) {
x = skb->sp->xvec[skb->sp->len - 1];
if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
return 0;
x->outer_mode->xmit(x, skb);
err = x->type_offload->xmit(x, skb, features);
if (err) {
XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
return err;
}
skb_push(skb, skb->data - skb_mac_header(skb));
}
return 0;
}
EXPORT_SYMBOL_GPL(validate_xmit_xfrm);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo)
{
int err;
struct dst_entry *dst;
struct net_device *dev;
struct xfrm_state_offload *xso = &x->xso;
xfrm_address_t *saddr;
xfrm_address_t *daddr;
if (!x->type_offload)
return 0;
/* We don't yet support UDP encapsulation, TFC padding and ESN. */
if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
return 0;
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
saddr = &x->props.saddr;
daddr = &x->id.daddr;
} else {
saddr = &x->id.daddr;
daddr = &x->props.saddr;
}
dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family);
if (IS_ERR(dst))
return 0;
dev = dst->dev;
dev_hold(dev);
dst_release(dst);
}
if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
dev_put(dev);
return 0;
}
xso->dev = dev;
xso->num_exthdrs = 1;
xso->flags = xuo->flags;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
dev_put(dev);
return err;
}
return 0;
}
EXPORT_SYMBOL_GPL(xfrm_dev_state_add);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
{
int mtu;
struct dst_entry *dst = skb_dst(skb);
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct net_device *dev = x->xso.dev;
if (!x->type_offload || x->encap)
return false;
if ((x->xso.offload_handle && (dev == dst->path->dev)) &&
!dst->child->xfrm && x->type->get_mtu) {
mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
if (skb->len <= mtu)
goto ok;
if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
goto ok;
}
return false;
ok:
if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok)
return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x);
return true;
}
EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
#endif
static int xfrm_dev_register(struct net_device *dev)
{
if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
return NOTIFY_BAD;
if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
!(dev->features & NETIF_F_HW_ESP))
return NOTIFY_BAD;
return NOTIFY_DONE;
}
static int xfrm_dev_unregister(struct net_device *dev)
{
xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
static int xfrm_dev_feat_change(struct net_device *dev)
{
if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
return NOTIFY_BAD;
else if (!(dev->features & NETIF_F_HW_ESP))
dev->xfrmdev_ops = NULL;
if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
!(dev->features & NETIF_F_HW_ESP))
return NOTIFY_BAD;
return NOTIFY_DONE;
}
static int xfrm_dev_down(struct net_device *dev)
{
if (dev->features & NETIF_F_HW_ESP)
xfrm_dev_state_flush(dev_net(dev), dev, true);
xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
case NETDEV_REGISTER:
return xfrm_dev_register(dev);
case NETDEV_UNREGISTER:
return xfrm_dev_unregister(dev);
case NETDEV_FEAT_CHANGE:
return xfrm_dev_feat_change(dev);
case NETDEV_DOWN:
return xfrm_dev_down(dev);
}
return NOTIFY_DONE;
}
static struct notifier_block xfrm_dev_notifier = {
.notifier_call = xfrm_dev_event,
};
void __net_init xfrm_dev_init(void)
{
register_netdevice_notifier(&xfrm_dev_notifier);
}