linux_dsm_epyc7002/include/linux/rtnetlink.h
Mahesh Bandewar 395eea6ccf rtnetlink: delay RTM_DELLINK notification until after ndo_uninit()
The commit 56bfa7ee7c ("unregister_netdevice : move RTM_DELLINK to
until after ndo_uninit") tried to do this ealier but while doing so
it created a problem. Unfortunately the delayed rtmsg_ifinfo() also
delayed call to fill_info(). So this translated into asking driver
to remove private state and then query it's private state. This
could have catastropic consequences.

This change breaks the rtmsg_ifinfo() into two parts - one takes the
precise snapshot of the device by called fill_info() before calling
the ndo_uninit() and the second part sends the notification using
collected snapshot.

It was brought to notice when last link is deleted from an ipvlan device
when it has free-ed the port and the subsequent .fill_info() call is
trying to get the info from the port.

kernel: [  255.139429] ------------[ cut here ]------------
kernel: [  255.139439] WARNING: CPU: 12 PID: 11173 at net/core/rtnetlink.c:2238 rtmsg_ifinfo+0x100/0x110()
kernel: [  255.139493] Modules linked in: ipvlan bonding w1_therm ds2482 wire cdc_acm ehci_pci ehci_hcd i2c_dev i2c_i801 i2c_core msr cpuid bnx2x ptp pps_core mdio libcrc32c
kernel: [  255.139513] CPU: 12 PID: 11173 Comm: ip Not tainted 3.18.0-smp-DEV #167
kernel: [  255.139514] Hardware name: Intel RML,PCH/Ibis_QC_18, BIOS 1.0.10 05/15/2012
kernel: [  255.139515]  0000000000000009 ffff880851b6b828 ffffffff815d87f4 00000000000000e0
kernel: [  255.139516]  0000000000000000 ffff880851b6b868 ffffffff8109c29c 0000000000000000
kernel: [  255.139518]  00000000ffffffa6 00000000000000d0 ffffffff81aaf580 0000000000000011
kernel: [  255.139520] Call Trace:
kernel: [  255.139527]  [<ffffffff815d87f4>] dump_stack+0x46/0x58
kernel: [  255.139531]  [<ffffffff8109c29c>] warn_slowpath_common+0x8c/0xc0
kernel: [  255.139540]  [<ffffffff8109c2ea>] warn_slowpath_null+0x1a/0x20
kernel: [  255.139544]  [<ffffffff8150d570>] rtmsg_ifinfo+0x100/0x110
kernel: [  255.139547]  [<ffffffff814f78b5>] rollback_registered_many+0x1d5/0x2d0
kernel: [  255.139549]  [<ffffffff814f79cf>] unregister_netdevice_many+0x1f/0xb0
kernel: [  255.139551]  [<ffffffff8150acab>] rtnl_dellink+0xbb/0x110
kernel: [  255.139553]  [<ffffffff8150da90>] rtnetlink_rcv_msg+0xa0/0x240
kernel: [  255.139557]  [<ffffffff81329283>] ? rhashtable_lookup_compare+0x43/0x80
kernel: [  255.139558]  [<ffffffff8150d9f0>] ? __rtnl_unlock+0x20/0x20
kernel: [  255.139562]  [<ffffffff8152cb11>] netlink_rcv_skb+0xb1/0xc0
kernel: [  255.139563]  [<ffffffff8150a495>] rtnetlink_rcv+0x25/0x40
kernel: [  255.139565]  [<ffffffff8152c398>] netlink_unicast+0x178/0x230
kernel: [  255.139567]  [<ffffffff8152c75f>] netlink_sendmsg+0x30f/0x420
kernel: [  255.139571]  [<ffffffff814e0b0c>] sock_sendmsg+0x9c/0xd0
kernel: [  255.139575]  [<ffffffff811d1d7f>] ? rw_copy_check_uvector+0x6f/0x130
kernel: [  255.139577]  [<ffffffff814e11c9>] ? copy_msghdr_from_user+0x139/0x1b0
kernel: [  255.139578]  [<ffffffff814e1774>] ___sys_sendmsg+0x304/0x310
kernel: [  255.139581]  [<ffffffff81198723>] ? handle_mm_fault+0xca3/0xde0
kernel: [  255.139585]  [<ffffffff811ebc4c>] ? destroy_inode+0x3c/0x70
kernel: [  255.139589]  [<ffffffff8108e6ec>] ? __do_page_fault+0x20c/0x500
kernel: [  255.139597]  [<ffffffff811e8336>] ? dput+0xb6/0x190
kernel: [  255.139606]  [<ffffffff811f05f6>] ? mntput+0x26/0x40
kernel: [  255.139611]  [<ffffffff811d2b94>] ? __fput+0x174/0x1e0
kernel: [  255.139613]  [<ffffffff814e2129>] __sys_sendmsg+0x49/0x90
kernel: [  255.139615]  [<ffffffff814e2182>] SyS_sendmsg+0x12/0x20
kernel: [  255.139617]  [<ffffffff815df092>] system_call_fastpath+0x12/0x17
kernel: [  255.139619] ---[ end trace 5e6703e87d984f6b ]---

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Reported-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Cc: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-09 13:36:57 -05:00

114 lines
3.7 KiB
C

#ifndef __LINUX_RTNETLINK_H
#define __LINUX_RTNETLINK_H
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/wait.h>
#include <uapi/linux/rtnetlink.h>
extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
u32 group, struct nlmsghdr *nlh, gfp_t flags);
extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
u32 id, long expires, u32 error);
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned change, gfp_t flags);
void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
gfp_t flags);
/* RTNL is used as a global lock for all changes to network configuration */
extern void rtnl_lock(void);
extern void rtnl_unlock(void);
extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
extern wait_queue_head_t netdev_unregistering_wq;
extern struct mutex net_mutex;
#ifdef CONFIG_PROVE_LOCKING
extern int lockdep_rtnl_is_held(void);
#else
static inline int lockdep_rtnl_is_held(void)
{
return 1;
}
#endif /* #ifdef CONFIG_PROVE_LOCKING */
/**
* rcu_dereference_rtnl - rcu_dereference with debug checking
* @p: The pointer to read, prior to dereferencing
*
* Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
* or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference()
*/
#define rcu_dereference_rtnl(p) \
rcu_dereference_check(p, lockdep_rtnl_is_held())
/**
* rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking
* @p: The pointer to read, prior to dereference
*
* Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh()
* or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh()
*/
#define rcu_dereference_bh_rtnl(p) \
rcu_dereference_bh_check(p, lockdep_rtnl_is_held())
/**
* rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
* @p: The pointer to read, prior to dereferencing
*
* Return the value of the specified RCU-protected pointer, but omit
* both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
* caller holds RTNL.
*/
#define rtnl_dereference(p) \
rcu_dereference_protected(p, lockdep_rtnl_is_held())
static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
{
return rtnl_dereference(dev->ingress_queue);
}
extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
extern void rtnetlink_init(void);
extern void __rtnl_unlock(void);
#define ASSERT_RTNL() do { \
if (unlikely(!rtnl_is_locked())) { \
printk(KERN_ERR "RTNL: assertion failed at %s (%d)\n", \
__FILE__, __LINE__); \
dump_stack(); \
} \
} while(0)
extern int ndo_dflt_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev,
int idx);
extern int ndo_dflt_fdb_add(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr,
u16 vid,
u16 flags);
extern int ndo_dflt_fdb_del(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr,
u16 vid);
extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u16 mode,
u32 flags, u32 mask);
#endif /* __LINUX_RTNETLINK_H */