linux_dsm_epyc7002/net/tipc/discover.c
Jon Maloy adba75be0d tipc: fix lockdep warning when reinitilaizing sockets
We get the following warning:

[   47.926140] 32-bit node address hash set to 2010a0a
[   47.927202]
[   47.927433] ================================
[   47.928050] WARNING: inconsistent lock state
[   47.928661] 4.19.0+ #37 Tainted: G            E
[   47.929346] --------------------------------
[   47.929954] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[   47.930116] swapper/3/0 [HC0[0]:SC1[3]:HE1:SE0] takes:
[   47.930116] 00000000af8bc31e (&(&ht->lock)->rlock){+.?.}, at: rhashtable_walk_enter+0x36/0xb0
[   47.930116] {SOFTIRQ-ON-W} state was registered at:
[   47.930116]   _raw_spin_lock+0x29/0x60
[   47.930116]   rht_deferred_worker+0x556/0x810
[   47.930116]   process_one_work+0x1f5/0x540
[   47.930116]   worker_thread+0x64/0x3e0
[   47.930116]   kthread+0x112/0x150
[   47.930116]   ret_from_fork+0x3a/0x50
[   47.930116] irq event stamp: 14044
[   47.930116] hardirqs last  enabled at (14044): [<ffffffff9a07fbba>] __local_bh_enable_ip+0x7a/0xf0
[   47.938117] hardirqs last disabled at (14043): [<ffffffff9a07fb81>] __local_bh_enable_ip+0x41/0xf0
[   47.938117] softirqs last  enabled at (14028): [<ffffffff9a0803ee>] irq_enter+0x5e/0x60
[   47.938117] softirqs last disabled at (14029): [<ffffffff9a0804a5>] irq_exit+0xb5/0xc0
[   47.938117]
[   47.938117] other info that might help us debug this:
[   47.938117]  Possible unsafe locking scenario:
[   47.938117]
[   47.938117]        CPU0
[   47.938117]        ----
[   47.938117]   lock(&(&ht->lock)->rlock);
[   47.938117]   <Interrupt>
[   47.938117]     lock(&(&ht->lock)->rlock);
[   47.938117]
[   47.938117]  *** DEADLOCK ***
[   47.938117]
[   47.938117] 2 locks held by swapper/3/0:
[   47.938117]  #0: 0000000062c64f90 ((&d->timer)){+.-.}, at: call_timer_fn+0x5/0x280
[   47.938117]  #1: 00000000ee39619c (&(&d->lock)->rlock){+.-.}, at: tipc_disc_timeout+0xc8/0x540 [tipc]
[   47.938117]
[   47.938117] stack backtrace:
[   47.938117] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G            E     4.19.0+ #37
[   47.938117] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   47.938117] Call Trace:
[   47.938117]  <IRQ>
[   47.938117]  dump_stack+0x5e/0x8b
[   47.938117]  print_usage_bug+0x1ed/0x1ff
[   47.938117]  mark_lock+0x5b5/0x630
[   47.938117]  __lock_acquire+0x4c0/0x18f0
[   47.938117]  ? lock_acquire+0xa6/0x180
[   47.938117]  lock_acquire+0xa6/0x180
[   47.938117]  ? rhashtable_walk_enter+0x36/0xb0
[   47.938117]  _raw_spin_lock+0x29/0x60
[   47.938117]  ? rhashtable_walk_enter+0x36/0xb0
[   47.938117]  rhashtable_walk_enter+0x36/0xb0
[   47.938117]  tipc_sk_reinit+0xb0/0x410 [tipc]
[   47.938117]  ? mark_held_locks+0x6f/0x90
[   47.938117]  ? __local_bh_enable_ip+0x7a/0xf0
[   47.938117]  ? lockdep_hardirqs_on+0x20/0x1a0
[   47.938117]  tipc_net_finalize+0xbf/0x180 [tipc]
[   47.938117]  tipc_disc_timeout+0x509/0x540 [tipc]
[   47.938117]  ? call_timer_fn+0x5/0x280
[   47.938117]  ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc]
[   47.938117]  ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc]
[   47.938117]  call_timer_fn+0xa1/0x280
[   47.938117]  ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc]
[   47.938117]  run_timer_softirq+0x1f2/0x4d0
[   47.938117]  __do_softirq+0xfc/0x413
[   47.938117]  irq_exit+0xb5/0xc0
[   47.938117]  smp_apic_timer_interrupt+0xac/0x210
[   47.938117]  apic_timer_interrupt+0xf/0x20
[   47.938117]  </IRQ>
[   47.938117] RIP: 0010:default_idle+0x1c/0x140
[   47.938117] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 41 54 55 53 65 8b 2d d8 2b 74 65 0f 1f 44 00 00 e8 c6 2c 8b ff fb f4 <65> 8b 2d c5 2b 74 65 0f 1f 44 00 00 5b 5d 41 5c c3 65 8b 05 b4 2b
[   47.938117] RSP: 0018:ffffaf6ac0207ec8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13
[   47.938117] RAX: ffff8f5b3735e200 RBX: 0000000000000003 RCX: 0000000000000001
[   47.938117] RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff8f5b3735e200
[   47.938117] RBP: 0000000000000003 R08: 0000000000000001 R09: 0000000000000000
[   47.938117] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[   47.938117] R13: 0000000000000000 R14: ffff8f5b3735e200 R15: ffff8f5b3735e200
[   47.938117]  ? default_idle+0x1a/0x140
[   47.938117]  do_idle+0x1bc/0x280
[   47.938117]  cpu_startup_entry+0x19/0x20
[   47.938117]  start_secondary+0x187/0x1c0
[   47.938117]  secondary_startup_64+0xa4/0xb0

The reason seems to be that tipc_net_finalize()->tipc_sk_reinit() is
calling the function rhashtable_walk_enter() within a timer interrupt.
We fix this by executing tipc_net_finalize() in work queue context.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-17 22:01:31 -08:00

416 lines
13 KiB
C

/*
* net/tipc/discover.c
*
* Copyright (c) 2003-2006, 2014-2018, Ericsson AB
* Copyright (c) 2005-2006, 2010-2011, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "core.h"
#include "node.h"
#include "discover.h"
/* min delay during bearer start up */
#define TIPC_DISC_INIT msecs_to_jiffies(125)
/* max delay if bearer has no links */
#define TIPC_DISC_FAST msecs_to_jiffies(1000)
/* max delay if bearer has links */
#define TIPC_DISC_SLOW msecs_to_jiffies(60000)
/* indicates no timer in use */
#define TIPC_DISC_INACTIVE 0xffffffff
/**
* struct tipc_discoverer - information about an ongoing link setup request
* @bearer_id: identity of bearer issuing requests
* @net: network namespace instance
* @dest: destination address for request messages
* @domain: network domain to which links can be established
* @num_nodes: number of nodes currently discovered (i.e. with an active link)
* @lock: spinlock for controlling access to requests
* @skb: request message to be (repeatedly) sent
* @timer: timer governing period between requests
* @timer_intv: current interval between requests (in ms)
*/
struct tipc_discoverer {
u32 bearer_id;
struct tipc_media_addr dest;
struct net *net;
u32 domain;
int num_nodes;
spinlock_t lock;
struct sk_buff *skb;
struct timer_list timer;
unsigned long timer_intv;
};
/**
* tipc_disc_init_msg - initialize a link setup message
* @net: the applicable net namespace
* @type: message type (request or response)
* @b: ptr to bearer issuing message
*/
static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
u32 mtyp, struct tipc_bearer *b)
{
struct tipc_net *tn = tipc_net(net);
u32 dest_domain = b->domain;
struct tipc_msg *hdr;
hdr = buf_msg(skb);
tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
MAX_H_SIZE, dest_domain);
msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN);
msg_set_non_seq(hdr, 1);
msg_set_node_sig(hdr, tn->random);
msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES);
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), &b->addr);
msg_set_node_id(hdr, tipc_own_id(net));
}
static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst,
u32 src, u32 sugg_addr,
struct tipc_media_addr *maddr,
struct tipc_bearer *b)
{
struct tipc_msg *hdr;
struct sk_buff *skb;
skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
if (!skb)
return;
hdr = buf_msg(skb);
tipc_disc_init_msg(net, skb, mtyp, b);
msg_set_sugg_node_addr(hdr, sugg_addr);
msg_set_dest_domain(hdr, dst);
tipc_bearer_xmit_skb(net, b->identity, skb, maddr);
}
/**
* disc_dupl_alert - issue node address duplication alert
* @b: pointer to bearer detecting duplication
* @node_addr: duplicated node address
* @media_addr: media address advertised by duplicated node
*/
static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
struct tipc_media_addr *media_addr)
{
char media_addr_str[64];
tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
media_addr);
pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr,
media_addr_str, b->name);
}
/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
* Returns true if message should be dropped by caller, i.e., if it is a
* trial message or we are inside trial period. Otherwise false.
*/
static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
struct tipc_media_addr *maddr,
struct tipc_bearer *b,
u32 dst, u32 src,
u32 sugg_addr,
u8 *peer_id,
int mtyp)
{
struct net *net = d->net;
struct tipc_net *tn = tipc_net(net);
bool trial = time_before(jiffies, tn->addr_trial_end);
u32 self = tipc_own_addr(net);
if (mtyp == DSC_TRIAL_FAIL_MSG) {
if (!trial)
return true;
/* Ignore if somebody else already gave new suggestion */
if (dst != tn->trial_addr)
return true;
/* Otherwise update trial address and restart trial period */
tn->trial_addr = sugg_addr;
msg_set_prevnode(buf_msg(d->skb), sugg_addr);
tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
return true;
}
/* Apply trial address if we just left trial period */
if (!trial && !self) {
tipc_sched_net_finalize(net, tn->trial_addr);
msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
/* Accept regular link requests/responses only after trial period */
if (mtyp != DSC_TRIAL_MSG)
return trial;
sugg_addr = tipc_node_try_addr(net, peer_id, src);
if (sugg_addr)
tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src,
self, sugg_addr, maddr, b);
return true;
}
/**
* tipc_disc_rcv - handle incoming discovery message (request or response)
* @net: applicable net namespace
* @skb: buffer containing message
* @b: bearer that message arrived on
*/
void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
struct tipc_bearer *b)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_msg *hdr = buf_msg(skb);
u16 caps = msg_node_capabilities(hdr);
bool legacy = tn->legacy_addr_format;
u32 sugg = msg_sugg_node_addr(hdr);
u32 signature = msg_node_sig(hdr);
u8 peer_id[NODE_ID_LEN] = {0,};
u32 dst = msg_dest_domain(hdr);
u32 net_id = msg_bc_netid(hdr);
struct tipc_media_addr maddr;
u32 src = msg_prevnode(hdr);
u32 mtyp = msg_type(hdr);
bool dupl_addr = false;
bool respond = false;
u32 self;
int err;
skb_linearize(skb);
hdr = buf_msg(skb);
if (caps & TIPC_NODE_ID128)
memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN);
else
sprintf(peer_id, "%x", src);
err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr));
kfree_skb(skb);
if (err || maddr.broadcast) {
pr_warn_ratelimited("Rcv corrupt discovery message\n");
return;
}
/* Ignore discovery messages from own node */
if (!memcmp(&maddr, &b->addr, sizeof(maddr)))
return;
if (net_id != tn->net_id)
return;
if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst,
src, sugg, peer_id, mtyp))
return;
self = tipc_own_addr(net);
/* Message from somebody using this node's address */
if (in_own_node(net, src)) {
disc_dupl_alert(b, self, &maddr);
return;
}
if (!tipc_in_scope(legacy, dst, self))
return;
if (!tipc_in_scope(legacy, b->domain, src))
return;
tipc_node_check_dest(net, src, peer_id, b, caps, signature,
&maddr, &respond, &dupl_addr);
if (dupl_addr)
disc_dupl_alert(b, src, &maddr);
if (!respond)
return;
if (mtyp != DSC_REQ_MSG)
return;
tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b);
}
/* tipc_disc_add_dest - increment set of discovered nodes
*/
void tipc_disc_add_dest(struct tipc_discoverer *d)
{
spin_lock_bh(&d->lock);
d->num_nodes++;
spin_unlock_bh(&d->lock);
}
/* tipc_disc_remove_dest - decrement set of discovered nodes
*/
void tipc_disc_remove_dest(struct tipc_discoverer *d)
{
int intv, num;
spin_lock_bh(&d->lock);
d->num_nodes--;
num = d->num_nodes;
intv = d->timer_intv;
if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST)) {
d->timer_intv = TIPC_DISC_INIT;
mod_timer(&d->timer, jiffies + d->timer_intv);
}
spin_unlock_bh(&d->lock);
}
/* tipc_disc_timeout - send a periodic link setup request
* Called whenever a link setup request timer associated with a bearer expires.
* - Keep doubling time between sent request until limit is reached;
* - Hold at fast polling rate if we don't have any associated nodes
* - Otherwise hold at slow polling rate
*/
static void tipc_disc_timeout(struct timer_list *t)
{
struct tipc_discoverer *d = from_timer(d, t, timer);
struct tipc_net *tn = tipc_net(d->net);
struct tipc_media_addr maddr;
struct sk_buff *skb = NULL;
struct net *net = d->net;
u32 bearer_id;
spin_lock_bh(&d->lock);
/* Stop searching if only desired node has been found */
if (tipc_node(d->domain) && d->num_nodes) {
d->timer_intv = TIPC_DISC_INACTIVE;
goto exit;
}
/* Did we just leave trial period ? */
if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
spin_unlock_bh(&d->lock);
tipc_sched_net_finalize(net, tn->trial_addr);
return;
}
/* Adjust timeout interval according to discovery phase */
if (time_before(jiffies, tn->addr_trial_end)) {
d->timer_intv = TIPC_DISC_INIT;
} else {
d->timer_intv *= 2;
if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
d->timer_intv = TIPC_DISC_SLOW;
else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
d->timer_intv = TIPC_DISC_FAST;
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
}
mod_timer(&d->timer, jiffies + d->timer_intv);
memcpy(&maddr, &d->dest, sizeof(maddr));
skb = skb_clone(d->skb, GFP_ATOMIC);
bearer_id = d->bearer_id;
exit:
spin_unlock_bh(&d->lock);
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr);
}
/**
* tipc_disc_create - create object to send periodic link setup requests
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
* @dest: destination address for request messages
* @dest_domain: network domain to which links can be established
*
* Returns 0 if successful, otherwise -errno.
*/
int tipc_disc_create(struct net *net, struct tipc_bearer *b,
struct tipc_media_addr *dest, struct sk_buff **skb)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_discoverer *d;
d = kmalloc(sizeof(*d), GFP_ATOMIC);
if (!d)
return -ENOMEM;
d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
if (!d->skb) {
kfree(d);
return -ENOMEM;
}
tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
/* Do we need an address trial period first ? */
if (!tipc_own_addr(net)) {
tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
}
memcpy(&d->dest, dest, sizeof(*dest));
d->net = net;
d->bearer_id = b->identity;
d->domain = b->domain;
d->num_nodes = 0;
d->timer_intv = TIPC_DISC_INIT;
spin_lock_init(&d->lock);
timer_setup(&d->timer, tipc_disc_timeout, 0);
mod_timer(&d->timer, jiffies + d->timer_intv);
b->disc = d;
*skb = skb_clone(d->skb, GFP_ATOMIC);
return 0;
}
/**
* tipc_disc_delete - destroy object sending periodic link setup requests
* @d: ptr to link duest structure
*/
void tipc_disc_delete(struct tipc_discoverer *d)
{
del_timer_sync(&d->timer);
kfree_skb(d->skb);
kfree(d);
}
/**
* tipc_disc_reset - reset object to send periodic link setup requests
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
* @dest_domain: network domain to which links can be established
*/
void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
{
struct tipc_discoverer *d = b->disc;
struct tipc_media_addr maddr;
struct sk_buff *skb;
spin_lock_bh(&d->lock);
tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
d->net = net;
d->bearer_id = b->identity;
d->domain = b->domain;
d->num_nodes = 0;
d->timer_intv = TIPC_DISC_INIT;
memcpy(&maddr, &d->dest, sizeof(maddr));
mod_timer(&d->timer, jiffies + d->timer_intv);
skb = skb_clone(d->skb, GFP_ATOMIC);
spin_unlock_bh(&d->lock);
if (skb)
tipc_bearer_xmit_skb(net, b->identity, skb, &maddr);
}