linux_dsm_epyc7002/net/hsr/hsr_framereg.c
Taehee Yoo 92a35678ec hsr: fix a race condition in node list insertion and deletion
hsr nodes are protected by RCU and there is no write side lock.
But node insertions and deletions could be being operated concurrently.
So write side locking is needed.

Test commands:
    ip netns add nst
    ip link add veth0 type veth peer name veth1
    ip link add veth2 type veth peer name veth3
    ip link set veth1 netns nst
    ip link set veth3 netns nst
    ip link set veth0 up
    ip link set veth2 up
    ip link add hsr0 type hsr slave1 veth0 slave2 veth2
    ip a a 192.168.100.1/24 dev hsr0
    ip link set hsr0 up
    ip netns exec nst ip link set veth1 up
    ip netns exec nst ip link set veth3 up
    ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3
    ip netns exec nst ip a a 192.168.100.2/24 dev hsr1
    ip netns exec nst ip link set hsr1 up

    for i in {0..9}
    do
        for j in {0..9}
	do
	    for k in {0..9}
	    do
	        for l in {0..9}
		do
	        arping 192.168.100.2 -I hsr0 -s 00:01:3$i:4$j:5$k:6$l -c1 &
		done
	    done
	done
    done

Splat looks like:
[  236.066091][ T3286] list_add corruption. next->prev should be prev (ffff8880a5940300), but was ffff8880a5940d0.
[  236.069617][ T3286] ------------[ cut here ]------------
[  236.070545][ T3286] kernel BUG at lib/list_debug.c:25!
[  236.071391][ T3286] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  236.072343][ T3286] CPU: 0 PID: 3286 Comm: arping Tainted: G        W         5.5.0-rc1+ #209
[  236.073463][ T3286] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  236.074695][ T3286] RIP: 0010:__list_add_valid+0x74/0xd0
[  236.075499][ T3286] Code: 48 39 da 75 27 48 39 f5 74 36 48 39 dd 74 31 48 83 c4 08 b8 01 00 00 00 5b 5d c3 48 b
[  236.078277][ T3286] RSP: 0018:ffff8880aaa97648 EFLAGS: 00010286
[  236.086991][ T3286] RAX: 0000000000000075 RBX: ffff8880d4624c20 RCX: 0000000000000000
[  236.088000][ T3286] RDX: 0000000000000075 RSI: 0000000000000008 RDI: ffffed1015552ebf
[  236.098897][ T3286] RBP: ffff88809b53d200 R08: ffffed101b3c04f9 R09: ffffed101b3c04f9
[  236.099960][ T3286] R10: 00000000308769a1 R11: ffffed101b3c04f8 R12: ffff8880d4624c28
[  236.100974][ T3286] R13: ffff8880d4624c20 R14: 0000000040310100 R15: ffff8880ce17ee02
[  236.138967][ T3286] FS:  00007f23479fa680(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000
[  236.144852][ T3286] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  236.145720][ T3286] CR2: 00007f4a14bab210 CR3: 00000000a61c6001 CR4: 00000000000606f0
[  236.146776][ T3286] Call Trace:
[  236.147222][ T3286]  hsr_add_node+0x314/0x490 [hsr]
[  236.153633][ T3286]  hsr_forward_skb+0x2b6/0x1bc0 [hsr]
[  236.154362][ T3286]  ? rcu_read_lock_sched_held+0x90/0xc0
[  236.155091][ T3286]  ? rcu_read_lock_bh_held+0xa0/0xa0
[  236.156607][ T3286]  hsr_dev_xmit+0x70/0xd0 [hsr]
[  236.157254][ T3286]  dev_hard_start_xmit+0x160/0x740
[  236.157941][ T3286]  __dev_queue_xmit+0x1961/0x2e10
[  236.158565][ T3286]  ? netdev_core_pick_tx+0x2e0/0x2e0
[ ... ]

Reported-by: syzbot+3924327f9ad5f4d2b343@syzkaller.appspotmail.com
Fixes: f421436a59 ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-25 16:35:35 -08:00

529 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright 2011-2014 Autronica Fire and Security AS
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*
* The HSR spec says never to forward the same frame twice on the same
* interface. A frame is identified by its source MAC address and its HSR
* sequence number. This code keeps track of senders and their sequence numbers
* to allow filtering of duplicate frames, and to detect HSR ring errors.
*/
#include <linux/if_ether.h>
#include <linux/etherdevice.h>
#include <linux/slab.h>
#include <linux/rculist.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
#include "hsr_netlink.h"
/* TODO: use hash lists for mac addresses (linux/jhash.h)? */
/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
* false otherwise.
*/
static bool seq_nr_after(u16 a, u16 b)
{
/* Remove inconsistency where
* seq_nr_after(a, b) == seq_nr_before(a, b)
*/
if ((int)b - a == 32768)
return false;
return (((s16)(b - a)) < 0);
}
#define seq_nr_before(a, b) seq_nr_after((b), (a))
#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b)))
#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
struct hsr_node *node;
node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node,
mac_list);
if (!node) {
WARN_ONCE(1, "HSR: No self node\n");
return false;
}
if (ether_addr_equal(addr, node->macaddress_A))
return true;
if (ether_addr_equal(addr, node->macaddress_B))
return true;
return false;
}
/* Search for mac entry. Caller must hold rcu read lock.
*/
static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
const unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
list_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, addr))
return node;
}
return NULL;
}
/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
* frames from self that's been looped over the HSR ring.
*/
int hsr_create_self_node(struct hsr_priv *hsr,
unsigned char addr_a[ETH_ALEN],
unsigned char addr_b[ETH_ALEN])
{
struct list_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node, *oldnode;
node = kmalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;
ether_addr_copy(node->macaddress_A, addr_a);
ether_addr_copy(node->macaddress_B, addr_b);
spin_lock_bh(&hsr->list_lock);
oldnode = list_first_or_null_rcu(self_node_db,
struct hsr_node, mac_list);
if (oldnode) {
list_replace_rcu(&oldnode->mac_list, &node->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(oldnode, rcu_head);
} else {
list_add_tail_rcu(&node->mac_list, self_node_db);
spin_unlock_bh(&hsr->list_lock);
}
return 0;
}
void hsr_del_self_node(struct hsr_priv *hsr)
{
struct list_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node;
spin_lock_bh(&hsr->list_lock);
node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
if (node) {
list_del_rcu(&node->mac_list);
kfree_rcu(node, rcu_head);
}
spin_unlock_bh(&hsr->list_lock);
}
void hsr_del_nodes(struct list_head *node_db)
{
struct hsr_node *node;
struct hsr_node *tmp;
list_for_each_entry_safe(node, tmp, node_db, mac_list)
kfree(node);
}
/* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A;
* seq_out is used to initialize filtering of outgoing duplicate frames
* originating from the newly added node.
*/
static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
struct list_head *node_db,
unsigned char addr[],
u16 seq_out)
{
struct hsr_node *new_node, *node;
unsigned long now;
int i;
new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
if (!new_node)
return NULL;
ether_addr_copy(new_node->macaddress_A, addr);
/* We are only interested in time diffs here, so use current jiffies
* as initialization. (0 could trigger an spurious ring error warning).
*/
now = jiffies;
for (i = 0; i < HSR_PT_PORTS; i++)
new_node->time_in[i] = now;
for (i = 0; i < HSR_PT_PORTS; i++)
new_node->seq_out[i] = seq_out;
spin_lock_bh(&hsr->list_lock);
list_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
goto out;
}
list_add_tail_rcu(&new_node->mac_list, node_db);
spin_unlock_bh(&hsr->list_lock);
return new_node;
out:
spin_unlock_bh(&hsr->list_lock);
kfree(new_node);
return node;
}
/* Get the hsr_node from which 'skb' was sent.
*/
struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
bool is_sup)
{
struct list_head *node_db = &port->hsr->node_db;
struct hsr_priv *hsr = port->hsr;
struct hsr_node *node;
struct ethhdr *ethhdr;
u16 seq_out;
if (!skb_mac_header_was_set(skb))
return NULL;
ethhdr = (struct ethhdr *)skb_mac_header(skb);
list_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, ethhdr->h_source))
return node;
if (ether_addr_equal(node->macaddress_B, ethhdr->h_source))
return node;
}
/* Everyone may create a node entry, connected node to a HSR device. */
if (ethhdr->h_proto == htons(ETH_P_PRP) ||
ethhdr->h_proto == htons(ETH_P_HSR)) {
/* Use the existing sequence_nr from the tag as starting point
* for filtering duplicate frames.
*/
seq_out = hsr_get_skb_sequence_nr(skb) - 1;
} else {
/* this is called also for frames from master port and
* so warn only for non master ports
*/
if (port->type != HSR_PT_MASTER)
WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
seq_out = HSR_SEQNR_START;
}
return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out);
}
/* Use the Supervision frame's info about an eventual macaddress_B for merging
* nodes that has previously had their macaddress_B registered as a separate
* node.
*/
void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
struct hsr_port *port_rcv)
{
struct hsr_priv *hsr = port_rcv->hsr;
struct hsr_sup_payload *hsr_sp;
struct hsr_node *node_real;
struct list_head *node_db;
struct ethhdr *ethhdr;
int i;
ethhdr = (struct ethhdr *)skb_mac_header(skb);
/* Leave the ethernet header. */
skb_pull(skb, sizeof(struct ethhdr));
/* And leave the HSR tag. */
if (ethhdr->h_proto == htons(ETH_P_HSR))
skb_pull(skb, sizeof(struct hsr_tag));
/* And leave the HSR sup tag. */
skb_pull(skb, sizeof(struct hsr_sup_tag));
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Merge node_curr (registered on macaddress_B) into node_real */
node_db = &port_rcv->hsr->node_db;
node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
if (!node_real)
/* No frame received from AddrA of this node yet */
node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
HSR_SEQNR_START - 1);
if (!node_real)
goto done; /* No mem */
if (node_real == node_curr)
/* Node has already been merged */
goto done;
ether_addr_copy(node_real->macaddress_B, ethhdr->h_source);
for (i = 0; i < HSR_PT_PORTS; i++) {
if (!node_curr->time_in_stale[i] &&
time_after(node_curr->time_in[i], node_real->time_in[i])) {
node_real->time_in[i] = node_curr->time_in[i];
node_real->time_in_stale[i] =
node_curr->time_in_stale[i];
}
if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i]))
node_real->seq_out[i] = node_curr->seq_out[i];
}
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
list_del_rcu(&node_curr->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(node_curr, rcu_head);
done:
skb_push(skb, sizeof(struct hsrv1_ethhdr_sp));
}
/* 'skb' is a frame meant for this host, that is to be passed to upper layers.
*
* If the frame was sent by a node's B interface, replace the source
* address with that node's "official" address (macaddress_A) so that upper
* layers recognize where it came from.
*/
void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb)
{
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: Mac header not set\n", __func__);
return;
}
memcpy(&eth_hdr(skb)->h_source, node->macaddress_A, ETH_ALEN);
}
/* 'skb' is a frame meant for another host.
* 'port' is the outgoing interface
*
* Substitute the target (dest) MAC address if necessary, so the it matches the
* recipient interface MAC address, regardless of whether that is the
* recipient's A or B interface.
* This is needed to keep the packets flowing through switches that learn on
* which "side" the different interfaces are.
*/
void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
struct hsr_port *port)
{
struct hsr_node *node_dst;
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: Mac header not set\n", __func__);
return;
}
if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
return;
node_dst = find_node_by_addr_A(&port->hsr->node_db,
eth_hdr(skb)->h_dest);
if (!node_dst) {
WARN_ONCE(1, "%s: Unknown node\n", __func__);
return;
}
if (port->type != node_dst->addr_B_port)
return;
ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B);
}
void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
u16 sequence_nr)
{
/* Don't register incoming frames without a valid sequence number. This
* ensures entries of restarted nodes gets pruned so that they can
* re-register and resume communications.
*/
if (seq_nr_before(sequence_nr, node->seq_out[port->type]))
return;
node->time_in[port->type] = jiffies;
node->time_in_stale[port->type] = false;
}
/* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid
* ethhdr->h_source address and skb->mac_header set.
*
* Return:
* 1 if frame can be shown to have been sent recently on this interface,
* 0 otherwise, or
* negative error code on error
*/
int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr)
{
if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]))
return 1;
node->seq_out[port->type] = sequence_nr;
return 0;
}
static struct hsr_port *get_late_port(struct hsr_priv *hsr,
struct hsr_node *node)
{
if (node->time_in_stale[HSR_PT_SLAVE_A])
return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
if (node->time_in_stale[HSR_PT_SLAVE_B])
return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
if (time_after(node->time_in[HSR_PT_SLAVE_B],
node->time_in[HSR_PT_SLAVE_A] +
msecs_to_jiffies(MAX_SLAVE_DIFF)))
return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
if (time_after(node->time_in[HSR_PT_SLAVE_A],
node->time_in[HSR_PT_SLAVE_B] +
msecs_to_jiffies(MAX_SLAVE_DIFF)))
return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
return NULL;
}
/* Remove stale sequence_nr records. Called by timer every
* HSR_LIFE_CHECK_INTERVAL (two seconds or so).
*/
void hsr_prune_nodes(struct timer_list *t)
{
struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
struct hsr_node *node;
struct hsr_node *tmp;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
spin_lock_bh(&hsr->list_lock);
list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
/* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
* nor time_in[HSR_PT_SLAVE_B], will ever be updated for
* the master port. Thus the master node will be repeatedly
* pruned leading to packet loss.
*/
if (hsr_addr_is_self(hsr, node->macaddress_A))
continue;
/* Shorthand */
time_a = node->time_in[HSR_PT_SLAVE_A];
time_b = node->time_in[HSR_PT_SLAVE_B];
/* Check for timestamps old enough to risk wrap-around */
if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
node->time_in_stale[HSR_PT_SLAVE_A] = true;
if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
node->time_in_stale[HSR_PT_SLAVE_B] = true;
/* Get age of newest frame from node.
* At least one time_in is OK here; nodes get pruned long
* before both time_ins can get stale
*/
timestamp = time_a;
if (node->time_in_stale[HSR_PT_SLAVE_A] ||
(!node->time_in_stale[HSR_PT_SLAVE_B] &&
time_after(time_b, time_a)))
timestamp = time_b;
/* Warn of ring error only as long as we get frames at all */
if (time_is_after_jiffies(timestamp +
msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
rcu_read_lock();
port = get_late_port(hsr, node);
if (port)
hsr_nl_ringerror(hsr, node->macaddress_A, port);
rcu_read_unlock();
}
/* Prune old entries */
if (time_is_before_jiffies(timestamp +
msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
hsr_nl_nodedown(hsr, node->macaddress_A);
list_del_rcu(&node->mac_list);
/* Note that we need to free this entry later: */
kfree_rcu(node, rcu_head);
}
}
spin_unlock_bh(&hsr->list_lock);
/* Restart timer */
mod_timer(&hsr->prune_timer,
jiffies + msecs_to_jiffies(PRUNE_PERIOD));
}
void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
if (!_pos) {
node = list_first_or_null_rcu(&hsr->node_db,
struct hsr_node, mac_list);
if (node)
ether_addr_copy(addr, node->macaddress_A);
return node;
}
node = _pos;
list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
ether_addr_copy(addr, node->macaddress_A);
return node;
}
return NULL;
}
int hsr_get_node_data(struct hsr_priv *hsr,
const unsigned char *addr,
unsigned char addr_b[ETH_ALEN],
unsigned int *addr_b_ifindex,
int *if1_age,
u16 *if1_seq,
int *if2_age,
u16 *if2_seq)
{
struct hsr_node *node;
struct hsr_port *port;
unsigned long tdiff;
rcu_read_lock();
node = find_node_by_addr_A(&hsr->node_db, addr);
if (!node) {
rcu_read_unlock();
return -ENOENT; /* No such entry */
}
ether_addr_copy(addr_b, node->macaddress_B);
tdiff = jiffies - node->time_in[HSR_PT_SLAVE_A];
if (node->time_in_stale[HSR_PT_SLAVE_A])
*if1_age = INT_MAX;
#if HZ <= MSEC_PER_SEC
else if (tdiff > msecs_to_jiffies(INT_MAX))
*if1_age = INT_MAX;
#endif
else
*if1_age = jiffies_to_msecs(tdiff);
tdiff = jiffies - node->time_in[HSR_PT_SLAVE_B];
if (node->time_in_stale[HSR_PT_SLAVE_B])
*if2_age = INT_MAX;
#if HZ <= MSEC_PER_SEC
else if (tdiff > msecs_to_jiffies(INT_MAX))
*if2_age = INT_MAX;
#endif
else
*if2_age = jiffies_to_msecs(tdiff);
/* Present sequence numbers as if they were incoming on interface */
*if1_seq = node->seq_out[HSR_PT_SLAVE_B];
*if2_seq = node->seq_out[HSR_PT_SLAVE_A];
if (node->addr_B_port != HSR_PT_NONE) {
port = hsr_port_get_hsr(hsr, node->addr_B_port);
*addr_b_ifindex = port->dev->ifindex;
} else {
*addr_b_ifindex = -1;
}
rcu_read_unlock();
return 0;
}