linux_dsm_epyc7002/net/ipv6/ip6_input.c
Edward Cree 22f6bbb7bc net: use skb_list_del_init() to remove from RX sublists
list_del() leaves the skb->next pointer poisoned, which can then lead to
 a crash in e.g. OVS forwarding.  For example, setting up an OVS VXLAN
 forwarding bridge on sfc as per:

========
$ ovs-vsctl show
5dfd9c47-f04b-4aaa-aa96-4fbb0a522a30
    Bridge "br0"
        Port "br0"
            Interface "br0"
                type: internal
        Port "enp6s0f0"
            Interface "enp6s0f0"
        Port "vxlan0"
            Interface "vxlan0"
                type: vxlan
                options: {key="1", local_ip="10.0.0.5", remote_ip="10.0.0.4"}
    ovs_version: "2.5.0"
========
(where 10.0.0.5 is an address on enp6s0f1)
and sending traffic across it will lead to the following panic:
========
general protection fault: 0000 [#1] SMP PTI
CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.20.0-rc3-ehc+ #701
Hardware name: Dell Inc. PowerEdge R710/0M233H, BIOS 6.4.0 07/23/2013
RIP: 0010:dev_hard_start_xmit+0x38/0x200
Code: 53 48 89 fb 48 83 ec 20 48 85 ff 48 89 54 24 08 48 89 4c 24 18 0f 84 ab 01 00 00 48 8d 86 90 00 00 00 48 89 f5 48 89 44 24 10 <4c> 8b 33 48 c7 03 00 00 00 00 48 8b 05 c7 d1 b3 00 4d 85 f6 0f 95
RSP: 0018:ffff888627b437e0 EFLAGS: 00010202
RAX: 0000000000000000 RBX: dead000000000100 RCX: ffff88862279c000
RDX: ffff888614a342c0 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff888618a88000 R08: 0000000000000001 R09: 00000000000003e8
R10: 0000000000000000 R11: ffff888614a34140 R12: 0000000000000000
R13: 0000000000000062 R14: dead000000000100 R15: ffff888616430000
FS:  0000000000000000(0000) GS:ffff888627b40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f6d2bc6d000 CR3: 000000000200a000 CR4: 00000000000006e0
Call Trace:
 <IRQ>
 __dev_queue_xmit+0x623/0x870
 ? masked_flow_lookup+0xf7/0x220 [openvswitch]
 ? ep_poll_callback+0x101/0x310
 do_execute_actions+0xaba/0xaf0 [openvswitch]
 ? __wake_up_common+0x8a/0x150
 ? __wake_up_common_lock+0x87/0xc0
 ? queue_userspace_packet+0x31c/0x5b0 [openvswitch]
 ovs_execute_actions+0x47/0x120 [openvswitch]
 ovs_dp_process_packet+0x7d/0x110 [openvswitch]
 ovs_vport_receive+0x6e/0xd0 [openvswitch]
 ? dst_alloc+0x64/0x90
 ? rt_dst_alloc+0x50/0xd0
 ? ip_route_input_slow+0x19a/0x9a0
 ? __udp_enqueue_schedule_skb+0x198/0x1b0
 ? __udp4_lib_rcv+0x856/0xa30
 ? __udp4_lib_rcv+0x856/0xa30
 ? cpumask_next_and+0x19/0x20
 ? find_busiest_group+0x12d/0xcd0
 netdev_frame_hook+0xce/0x150 [openvswitch]
 __netif_receive_skb_core+0x205/0xae0
 __netif_receive_skb_list_core+0x11e/0x220
 netif_receive_skb_list+0x203/0x460
 ? __efx_rx_packet+0x335/0x5e0 [sfc]
 efx_poll+0x182/0x320 [sfc]
 net_rx_action+0x294/0x3c0
 __do_softirq+0xca/0x297
 irq_exit+0xa6/0xb0
 do_IRQ+0x54/0xd0
 common_interrupt+0xf/0xf
 </IRQ>
========
So, in all listified-receive handling, instead pull skbs off the lists with
 skb_list_del_init().

Fixes: 9af86f9338 ("net: core: fix use-after-free in __netif_receive_skb_list_core")
Fixes: 7da517a3bc ("net: core: Another step of skb receive list processing")
Fixes: a4ca8b7df7 ("net: ipv4: fix drop handling in ip_list_rcv() and ip_list_rcv_finish()")
Fixes: d8269e2cbf ("net: ipv6: listify ipv6_rcv() and ip6_rcv_finish()")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-12-05 16:22:05 -08:00

512 lines
13 KiB
C

/*
* IPv6 input
* Linux INET6 implementation
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
* Ian P. Morris <I.P.Morris@soton.ac.uk>
*
* Based in linux/net/ipv4/ip_input.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* Changes
*
* Mitsuru KANDA @USAGI and
* YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
*/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/icmpv6.h>
#include <linux/mroute6.h>
#include <linux/slab.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
void (*edemux)(struct sk_buff *skb);
if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
edemux(skb);
}
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
skb = l3mdev_ip6_rcv(skb);
if (!skb)
return NET_RX_SUCCESS;
ip6_rcv_finish_core(net, sk, skb);
return dst_input(skb);
}
static void ip6_sublist_rcv_finish(struct list_head *head)
{
struct sk_buff *skb, *next;
list_for_each_entry_safe(skb, next, head, list)
dst_input(skb);
}
static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
struct list_head *head)
{
struct dst_entry *curr_dst = NULL;
struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
skb = l3mdev_ip6_rcv(skb);
if (!skb)
continue;
ip6_rcv_finish_core(net, sk, skb);
dst = skb_dst(skb);
if (curr_dst != dst) {
/* dispatch old sublist */
if (!list_empty(&sublist))
ip6_sublist_rcv_finish(&sublist);
/* start new sublist */
INIT_LIST_HEAD(&sublist);
curr_dst = dst;
}
list_add_tail(&skb->list, &sublist);
}
/* dispatch final sublist */
ip6_sublist_rcv_finish(&sublist);
}
static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
struct net *net)
{
const struct ipv6hdr *hdr;
u32 pkt_len;
struct inet6_dev *idev;
if (skb->pkt_type == PACKET_OTHERHOST) {
kfree_skb(skb);
return NULL;
}
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
__IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
/*
* Store incoming device index. When the packet will
* be queued, we cannot refer to skb->dev anymore.
*
* BTW, when we send a packet for our own local address on a
* non-loopback interface (e.g. ethX), it is being delivered
* via the loopback interface (lo) here; skb->dev = loopback_dev.
* It, however, should be considered as if it is being
* arrived via the sending interface (ethX), because of the
* nature of scoping architecture. --yoshfuji
*/
IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
hdr = ipv6_hdr(skb);
if (hdr->version != 6)
goto err;
__IP6_ADD_STATS(net, idev,
IPSTATS_MIB_NOECTPKTS +
(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
* The loopback address must not be used as the source address in IPv6
* packets that are sent outside of a single node. [..]
* A packet received on an interface with a destination address
* of loopback must be dropped.
*/
if ((ipv6_addr_loopback(&hdr->saddr) ||
ipv6_addr_loopback(&hdr->daddr)) &&
!(dev->flags & IFF_LOOPBACK) &&
!netif_is_l3_master(dev))
goto err;
/* RFC4291 Errata ID: 3480
* Interface-Local scope spans only a single interface on a
* node and is useful only for loopback transmission of
* multicast. Packets with interface-local scope received
* from another node must be discarded.
*/
if (!(skb->pkt_type == PACKET_LOOPBACK ||
dev->flags & IFF_LOOPBACK) &&
ipv6_addr_is_multicast(&hdr->daddr) &&
IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
goto err;
/* If enabled, drop unicast packets that were encapsulated in link-layer
* multicast or broadcast to protected against the so-called "hole-196"
* attack in 802.11 wireless.
*/
if (!ipv6_addr_is_multicast(&hdr->daddr) &&
(skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) &&
idev->cnf.drop_unicast_in_l2_multicast)
goto err;
/* RFC4291 2.7
* Nodes must not originate a packet to a multicast address whose scope
* field contains the reserved value 0; if such a packet is received, it
* must be silently dropped.
*/
if (ipv6_addr_is_multicast(&hdr->daddr) &&
IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0)
goto err;
/*
* RFC4291 2.7
* Multicast addresses must not be used as source addresses in IPv6
* packets or appear in any Routing header.
*/
if (ipv6_addr_is_multicast(&hdr->saddr))
goto err;
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
pkt_len = ntohs(hdr->payload_len);
/* pkt_len may be zero if Jumbo payload option is present */
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
__IP6_INC_STATS(net,
idev, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
hdr = ipv6_hdr(skb);
}
if (hdr->nexthdr == NEXTHDR_HOP) {
if (ipv6_parse_hopopts(skb) < 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
rcu_read_unlock();
return NULL;
}
}
rcu_read_unlock();
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
return skb;
err:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
drop:
rcu_read_unlock();
kfree_skb(skb);
return NULL;
}
int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct net *net = dev_net(skb->dev);
skb = ip6_rcv_core(skb, dev, net);
if (skb == NULL)
return NET_RX_DROP;
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
net, NULL, skb, dev, NULL,
ip6_rcv_finish);
}
static void ip6_sublist_rcv(struct list_head *head, struct net_device *dev,
struct net *net)
{
NF_HOOK_LIST(NFPROTO_IPV6, NF_INET_PRE_ROUTING, net, NULL,
head, dev, NULL, ip6_rcv_finish);
ip6_list_rcv_finish(net, NULL, head);
}
/* Receive a list of IPv6 packets */
void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *orig_dev)
{
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
skb_list_del_init(skb);
skb = ip6_rcv_core(skb, dev, net);
if (skb == NULL)
continue;
if (curr_dev != dev || curr_net != net) {
/* dispatch old sublist */
if (!list_empty(&sublist))
ip6_sublist_rcv(&sublist, curr_dev, curr_net);
/* start new sublist */
INIT_LIST_HEAD(&sublist);
curr_dev = dev;
curr_net = net;
}
list_add_tail(&skb->list, &sublist);
}
/* dispatch final sublist */
ip6_sublist_rcv(&sublist, curr_dev, curr_net);
}
/*
* Deliver the packet to the host
*/
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
int nexthdr;
bool raw;
bool have_final = false;
/*
* Parse extension headers
*/
rcu_read_lock();
resubmit:
idev = ip6_dst_idev(skb_dst(skb));
if (!pskb_pull(skb, skb_transport_offset(skb)))
goto discard;
nhoff = IP6CB(skb)->nhoff;
nexthdr = skb_network_header(skb)[nhoff];
resubmit_final:
raw = raw6_local_deliver(skb, nexthdr);
ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot) {
int ret;
if (have_final) {
if (!(ipprot->flags & INET6_PROTO_FINAL)) {
/* Once we've seen a final protocol don't
* allow encapsulation on any non-final
* ones. This allows foo in UDP encapsulation
* to work.
*/
goto discard;
}
} else if (ipprot->flags & INET6_PROTO_FINAL) {
const struct ipv6hdr *hdr;
/* Only do this once for first final protocol */
have_final = true;
/* Free reference early: we don't need it any more,
and it may hold ip_conntrack module loaded
indefinitely. */
nf_reset(skb);
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
hdr = ipv6_hdr(skb);
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
&hdr->saddr) &&
!ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
goto discard;
}
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
ret = ipprot->handler(skb);
if (ret > 0) {
if (ipprot->flags & INET6_PROTO_FINAL) {
/* Not an extension header, most likely UDP
* encapsulation. Use return value as nexthdr
* protocol not nhoff (which presumably is
* not set by handler).
*/
nexthdr = ret;
goto resubmit_final;
} else {
goto resubmit;
}
} else if (ret == 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
}
} else {
if (!raw) {
if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
__IP6_INC_STATS(net, idev,
IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
}
kfree_skb(skb);
} else {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
}
}
rcu_read_unlock();
return 0;
discard:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
rcu_read_unlock();
kfree_skb(skb);
return 0;
}
int ip6_input(struct sk_buff *skb)
{
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_input_finish);
}
EXPORT_SYMBOL_GPL(ip6_input);
int ip6_mc_input(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
bool deliver;
__IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
__in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
skb->len);
hdr = ipv6_hdr(skb);
deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
#ifdef CONFIG_IPV6_MROUTE
/*
* IPv6 multicast router mode is now supported ;)
*/
if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
!(ipv6_addr_type(&hdr->daddr) &
(IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
/*
* Okay, we try to forward - split and duplicate
* packets.
*/
struct sk_buff *skb2;
struct inet6_skb_parm *opt = IP6CB(skb);
/* Check for MLD */
if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
/* Check if this is a mld message */
u8 nexthdr = hdr->nexthdr;
__be16 frag_off;
int offset;
/* Check if the value of Router Alert
* is for MLD (0x0000).
*/
if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
deliver = false;
if (!ipv6_ext_hdr(nexthdr)) {
/* BUG */
goto out;
}
offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
&nexthdr, &frag_off);
if (offset < 0)
goto out;
if (ipv6_is_mld(skb, nexthdr, offset))
deliver = true;
goto out;
}
/* unknown RA - process it normally */
}
if (deliver)
skb2 = skb_clone(skb, GFP_ATOMIC);
else {
skb2 = skb;
skb = NULL;
}
if (skb2) {
ip6_mr_input(skb2);
}
}
out:
#endif
if (likely(deliver))
ip6_input(skb);
else {
/* discard */
kfree_skb(skb);
}
return 0;
}