linux_dsm_epyc7002/net/ipv6/fib6_rules.c
Brian Vazquez 55cced4f81 ipv6: fib6: avoid indirect calls from fib6_rule_lookup
It was reported that a considerable amount of cycles were spent on the
expensive indirect calls on fib6_rule_lookup. This patch introduces an
inline helper called pol_route_func that uses the indirect_call_wrappers
to avoid the indirect calls.

This patch saves around 50ns per call.

Performance was measured on the receiver by checking the amount of
syncookies that server was able to generate under a synflood load.

Traffic was generated using trafgen[1] which was pushing around 1Mpps on
a single queue. Receiver was using only one rx queue which help to
create a bottle neck and make the experiment rx-bounded.

These are the syncookies generated over 10s from the different runs:

Whithout the patch:
TcpExtSyncookiesSent            3553749            0.0
TcpExtSyncookiesSent            3550895            0.0
TcpExtSyncookiesSent            3553845            0.0
TcpExtSyncookiesSent            3541050            0.0
TcpExtSyncookiesSent            3539921            0.0
TcpExtSyncookiesSent            3557659            0.0
TcpExtSyncookiesSent            3526812            0.0
TcpExtSyncookiesSent            3536121            0.0
TcpExtSyncookiesSent            3529963            0.0
TcpExtSyncookiesSent            3536319            0.0

With the patch:
TcpExtSyncookiesSent            3611786            0.0
TcpExtSyncookiesSent            3596682            0.0
TcpExtSyncookiesSent            3606878            0.0
TcpExtSyncookiesSent            3599564            0.0
TcpExtSyncookiesSent            3601304            0.0
TcpExtSyncookiesSent            3609249            0.0
TcpExtSyncookiesSent            3617437            0.0
TcpExtSyncookiesSent            3608765            0.0
TcpExtSyncookiesSent            3620205            0.0
TcpExtSyncookiesSent            3601895            0.0

Without the patch the average is 354263 pkt/s or 2822 ns/pkt and with
the patch the average is 360738 pkt/s or 2772 ns/pkt which gives an
estimate of 50 ns per packet.

[1] http://netsniff-ng.org/

Changelog since v1:
 - Change ordering in the ICW (Paolo Abeni)

Cc: Luigi Rizzo <lrizzo@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Brian Vazquez <brianvv@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-23 15:11:19 -07:00

512 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* net/ipv6/fib6_rules.c IPv6 Routing Policy Rules
*
* Copyright (C)2003-2006 Helsinki University of Technology
* Copyright (C)2003-2006 USAGI/WIDE Project
*
* Authors
* Thomas Graf <tgraf@suug.ch>
* Ville Nuorvala <vnuorval@tcs.hut.fi>
*/
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/export.h>
#include <net/fib_rules.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/netlink.h>
struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
u8 tclass;
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
if (r->dst.plen || r->src.plen || r->tclass)
return false;
return fib_rule_matchall(rule);
}
bool fib6_rule_default(const struct fib_rule *rule)
{
if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
rule->l3mdev)
return false;
if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
return false;
return true;
}
EXPORT_SYMBOL_GPL(fib6_rule_default);
int fib6_rules_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return fib_rules_dump(net, nb, AF_INET6, extack);
}
unsigned int fib6_rules_seq_read(struct net *net)
{
return fib_rules_seq_read(net, AF_INET6);
}
/* called with rcu lock held; no reference taken on fib6_info */
int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
struct fib6_result *res, int flags)
{
int err;
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = fib6_table_lookup,
.lookup_data = &oif,
.result = res,
.flags = FIB_LOOKUP_NOREF,
};
l3mdev_update_flow(net, flowi6_to_flowi(fl6));
err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
} else {
err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif,
fl6, res, flags);
if (err || res->f6i == net->ipv6.fib6_null_entry)
err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
oif, fl6, res, flags);
}
return err;
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
struct fib6_result res = {};
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.lookup_data = skb,
.result = &res,
.flags = FIB_LOOKUP_NOREF,
};
/* update flow if oif or iif point to device enslaved to l3mdev */
l3mdev_update_flow(net, flowi6_to_flowi(fl6));
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
if (res.rt6)
return &res.rt6->dst;
} else {
struct rt6_info *rt;
rt = pol_lookup_func(lookup,
net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put_flags(rt, flags);
rt = pol_lookup_func(lookup,
net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put_flags(rt, flags);
}
if (!(flags & RT6_LOOKUP_F_DST_NOREF))
dst_hold(&net->ipv6.ip6_null_entry->dst);
return &net->ipv6.ip6_null_entry->dst;
}
static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
struct flowi6 *flp6, const struct net_device *dev)
{
struct fib6_rule *r = (struct fib6_rule *)rule;
/* If we need to find a source address for this traffic,
* we check the result if it meets requirement of the rule.
*/
if ((rule->flags & FIB_RULE_FIND_SADDR) &&
r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
struct in6_addr saddr;
if (ipv6_dev_get_saddr(net, dev, &flp6->daddr,
rt6_flags2srcprefs(flags), &saddr))
return -EAGAIN;
if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen))
return -EAGAIN;
flp6->saddr = saddr;
}
return 0;
}
static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct net *net = rule->fr_net;
struct fib6_table *table;
int err, *oif;
u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
break;
case FR_ACT_UNREACHABLE:
return -ENETUNREACH;
case FR_ACT_PROHIBIT:
return -EACCES;
case FR_ACT_BLACKHOLE:
default:
return -EINVAL;
}
tb_id = fib_rule_get_table(rule, arg);
table = fib6_get_table(net, tb_id);
if (!table)
return -EAGAIN;
oif = (int *)arg->lookup_data;
err = fib6_table_lookup(net, table, *oif, flp6, res, flags);
if (!err && res->f6i != net->ipv6.fib6_null_entry)
err = fib6_rule_saddr(net, rule, flags, flp6,
res->nh->fib_nh_dev);
else
err = -EAGAIN;
return err;
}
static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct rt6_info *rt = NULL;
struct fib6_table *table;
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
int err = 0;
u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
break;
case FR_ACT_UNREACHABLE:
err = -ENETUNREACH;
rt = net->ipv6.ip6_null_entry;
goto discard_pkt;
default:
case FR_ACT_BLACKHOLE:
err = -EINVAL;
rt = net->ipv6.ip6_blk_hole_entry;
goto discard_pkt;
case FR_ACT_PROHIBIT:
err = -EACCES;
rt = net->ipv6.ip6_prohibit_entry;
goto discard_pkt;
}
tb_id = fib_rule_get_table(rule, arg);
table = fib6_get_table(net, tb_id);
if (!table) {
err = -EAGAIN;
goto out;
}
rt = pol_lookup_func(lookup,
net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
err = fib6_rule_saddr(net, rule, flags, flp6,
ip6_dst_idev(&rt->dst)->dev);
if (err == -EAGAIN)
goto again;
err = rt->dst.error;
if (err != -EAGAIN)
goto out;
}
again:
ip6_rt_put_flags(rt, flags);
err = -EAGAIN;
rt = NULL;
goto out;
discard_pkt:
if (!(flags & RT6_LOOKUP_F_DST_NOREF))
dst_hold(&rt->dst);
out:
res->rt6 = rt;
return err;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
if (arg->lookup_ptr == fib6_table_lookup)
return fib6_rule_action_alt(rule, flp, flags, arg);
return __fib6_rule_action(rule, flp, flags, arg);
}
static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
{
struct fib6_result *res = arg->result;
struct rt6_info *rt = res->rt6;
struct net_device *dev = NULL;
if (!rt)
return false;
if (rt->rt6i_idev)
dev = rt->rt6i_idev->dev;
/* do not accept result if the route does
* not meet the required prefix length
*/
if (rt->rt6i_dst.plen <= rule->suppress_prefixlen)
goto suppress_route;
/* do not accept result if the route uses a device
* belonging to a forbidden interface group
*/
if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
goto suppress_route;
return false;
suppress_route:
if (!(arg->flags & FIB_LOOKUP_NOREF))
ip6_rt_put(rt);
return true;
}
static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
struct fib6_rule *r = (struct fib6_rule *) rule;
struct flowi6 *fl6 = &fl->u.ip6;
if (r->dst.plen &&
!ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen))
return 0;
/*
* If FIB_RULE_FIND_SADDR is set and we do not have a
* source address for the traffic, we defer check for
* source address.
*/
if (r->src.plen) {
if (flags & RT6_LOOKUP_F_HAS_SADDR) {
if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr,
r->src.plen))
return 0;
} else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
return 0;
}
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
return 0;
if (fib_rule_port_range_set(&rule->sport_range) &&
!fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
return 0;
if (fib_rule_port_range_set(&rule->dport_range) &&
!fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
return 0;
return 1;
}
static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
FRA_GENERIC_POLICY,
};
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
int err = -EINVAL;
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
goto errout;
}
if (fib6_new_table(net, rule->table) == NULL) {
err = -ENOBUFS;
goto errout;
}
}
if (frh->src_len)
rule6->src.addr = nla_get_in6_addr(tb[FRA_SRC]);
if (frh->dst_len)
rule6->dst.addr = nla_get_in6_addr(tb[FRA_DST]);
rule6->src.plen = frh->src_len;
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
if (fib_rule_requires_fldissect(rule))
net->ipv6.fib6_rules_require_fldissect++;
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
static int fib6_rule_delete(struct fib_rule *rule)
{
struct net *net = rule->fr_net;
if (net->ipv6.fib6_rules_require_fldissect &&
fib_rule_requires_fldissect(rule))
net->ipv6.fib6_rules_require_fldissect--;
return 0;
}
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (frh->src_len && (rule6->src.plen != frh->src_len))
return 0;
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
if (frh->tos && (rule6->tclass != frh->tos))
return 0;
if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
if (frh->dst_len &&
nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
return 0;
return 1;
}
static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh)
{
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
frh->tos = rule6->tclass;
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
(rule6->src.plen &&
nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr)))
goto nla_put_failure;
return 0;
nla_put_failure:
return -ENOBUFS;
}
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
+ nla_total_size(16); /* src */
}
static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
.addr_size = sizeof(struct in6_addr),
.action = fib6_rule_action,
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
.delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = fib6_rule_policy,
.owner = THIS_MODULE,
.fro_net = &init_net,
};
static int __net_init fib6_rules_net_init(struct net *net)
{
struct fib_rules_ops *ops;
int err = -ENOMEM;
ops = fib_rules_register(&fib6_rules_ops_template, net);
if (IS_ERR(ops))
return PTR_ERR(ops);
err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0);
if (err)
goto out_fib6_rules_ops;
err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0);
if (err)
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
out_fib6_rules_ops:
fib_rules_unregister(ops);
goto out;
}
static void __net_exit fib6_rules_net_exit(struct net *net)
{
rtnl_lock();
fib_rules_unregister(net->ipv6.fib6_rules_ops);
rtnl_unlock();
}
static struct pernet_operations fib6_rules_net_ops = {
.init = fib6_rules_net_init,
.exit = fib6_rules_net_exit,
};
int __init fib6_rules_init(void)
{
return register_pernet_subsys(&fib6_rules_net_ops);
}
void fib6_rules_cleanup(void)
{
unregister_pernet_subsys(&fib6_rules_net_ops);
}