From 0e63208915a8d7590d0a6218dadb2a6a00ac705a Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@gmail.com>
Date: Mon, 4 Mar 2019 23:26:10 +0100
Subject: [PATCH 01/72] tipc: fix RDM/DGRAM connect() regression

Fix regression bug introduced in
commit 365ad353c256 ("tipc: reduce risk of user starvation during link
congestion")

Only signal -EDESTADDRREQ for RDM/DGRAM if we don't have a cached
sockaddr.

Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion")
Signed-off-by: Erik Hugne <erik.hugne@gmail.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e482b342bfa8..3274ef625dba 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1333,7 +1333,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 
 	if (unlikely(!dest)) {
 		dest = &tsk->peer;
-		if (!syn || dest->family != AF_TIPC)
+		if (!syn && dest->family != AF_TIPC)
 			return -EDESTADDRREQ;
 	}
 

From 4177c5d94264b57f426ef5c45a788808d1a1e536 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Tue, 5 Mar 2019 08:29:28 +0800
Subject: [PATCH 02/72] net/sched: act_tunnel_key: Fix double free dst_cache

dst_cache_destroy will be called in dst_release

dst_release-->dst_destroy_rcu-->dst_destroy-->metadata_dst_free
-->dst_cache_destroy

It should not call dst_cache_destroy before dst_release

Fixes: 41411e2fd6b8 ("net/sched: act_tunnel_key: Add dst_cache support")
Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3beb4717d3b7..7c6591b991d5 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,14 +201,9 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
 {
 	if (!p)
 		return;
-	if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
-#ifdef CONFIG_DST_CACHE
-		struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info;
-
-		dst_cache_destroy(&info->dst_cache);
-#endif
+	if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
 		dst_release(&p->tcft_enc_metadata->dst);
-	}
+
 	kfree_rcu(p, rcu);
 }
 
@@ -338,7 +333,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 						  &metadata->u.tun_info,
 						  opts_len, extack);
 			if (ret < 0)
-				goto release_dst_cache;
+				goto release_tun_meta;
 		}
 
 		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
@@ -354,14 +349,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 				     &act_tunnel_key_ops, bind, true);
 		if (ret) {
 			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
-			goto release_dst_cache;
+			goto release_tun_meta;
 		}
 
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
 		NL_SET_ERR_MSG(extack, "TC IDR already exists");
 		ret = -EEXIST;
-		goto release_dst_cache;
+		goto release_tun_meta;
 	}
 
 	t = to_tunnel_key(*a);
@@ -371,7 +366,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
 		ret = -ENOMEM;
 		exists = true;
-		goto release_dst_cache;
+		goto release_tun_meta;
 	}
 	params_new->tcft_action = parm->t_action;
 	params_new->tcft_enc_metadata = metadata;
@@ -388,12 +383,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 
 	return ret;
 
-release_dst_cache:
-#ifdef CONFIG_DST_CACHE
-	if (metadata)
-		dst_cache_destroy(&metadata->u.tun_info.dst_cache);
 release_tun_meta:
-#endif
 	if (metadata)
 		dst_release(&metadata->dst);
 

From f4772dee101c7ac66e395d07b3140d457901fa18 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 6 Mar 2019 11:12:34 +0300
Subject: [PATCH 03/72] net: hns3: Fix a logical vs bitwise typo

There were a couple logical ORs accidentally mixed in with the bitwise
ORs.

Fixes: e8149933b1fa ("net: hns3: remove hnae3_get_bit in data path")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 3cb43b1f1c2e..1e4efc47c7a5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2321,8 +2321,8 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
 	if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
 		return;
 
-	if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) ||
-				 BIT(HNS3_RXD_OL3E_B) ||
+	if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) |
+				 BIT(HNS3_RXD_OL3E_B) |
 				 BIT(HNS3_RXD_OL4E_B)))) {
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.l3l4_csum_err++;

From 22c74764aa2943ecdf9f07c900d8a9c8ba6c9265 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 6 Mar 2019 10:42:53 +0100
Subject: [PATCH 04/72] ipv4/route: fail early when inet dev is missing

If a non local multicast packet reaches ip_route_input_rcu() while
the ingress device IPv4 private data (in_dev) is NULL, we end up
doing a NULL pointer dereference in IN_DEV_MFORWARD().

Since the later call to ip_route_input_mc() is going to fail if
!in_dev, we can fail early in such scenario and avoid the dangerous
code path.

v1 -> v2:
 - clarified the commit message, no code changes

Reported-by: Tianhao Zhao <tizhao@redhat.com>
Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 738ff0a1a048..8ca3642f0d9b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2149,12 +2149,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		int our = 0;
 		int err = -EINVAL;
 
-		if (in_dev)
-			our = ip_check_mc_rcu(in_dev, daddr, saddr,
-					      ip_hdr(skb)->protocol);
+		if (!in_dev)
+			return err;
+		our = ip_check_mc_rcu(in_dev, daddr, saddr,
+				      ip_hdr(skb)->protocol);
 
 		/* check l3 master if no match yet */
-		if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+		if (!our && netif_is_l3_slave(dev)) {
 			struct in_device *l3_in_dev;
 
 			l3_in_dev = __in_dev_get_rcu(skb->dev);

From f4b3ec4e6aa1a2ca437905a519ae08e8cf6af754 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 6 Mar 2019 10:25:42 +0000
Subject: [PATCH 05/72] iptunnel: NULL pointer deref for ip_md_tunnel_xmit

Naresh Kamboju noted the following oops during execution of selftest
tools/testing/selftests/bpf/test_tunnel.sh on x86_64:

[  274.120445] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000000
[  274.128285] #PF error: [INSTR]
[  274.131351] PGD 8000000414a0e067 P4D 8000000414a0e067 PUD 3b6334067 PMD 0
[  274.138241] Oops: 0010 [#1] SMP PTI
[  274.141734] CPU: 1 PID: 11464 Comm: ping Not tainted
5.0.0-rc4-next-20190129 #1
[  274.149046] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[  274.156526] RIP: 0010:          (null)
[  274.160280] Code: Bad RIP value.
[  274.163509] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286
[  274.168726] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000
[  274.175851] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0
[  274.182974] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c
[  274.190098] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000
[  274.197222] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400
[  274.204346] FS:  00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000)
knlGS:0000000000000000
[  274.212424] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  274.218162] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0
[  274.225292] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  274.232416] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  274.239541] Call Trace:
[  274.241988]  ? tnl_update_pmtu+0x296/0x3b0
[  274.246085]  ip_md_tunnel_xmit+0x1bc/0x520
[  274.250176]  gre_fb_xmit+0x330/0x390
[  274.253754]  gre_tap_xmit+0x128/0x180
[  274.257414]  dev_hard_start_xmit+0xb7/0x300
[  274.261598]  sch_direct_xmit+0xf6/0x290
[  274.265430]  __qdisc_run+0x15d/0x5e0
[  274.269007]  __dev_queue_xmit+0x2c5/0xc00
[  274.273011]  ? dev_queue_xmit+0x10/0x20
[  274.276842]  ? eth_header+0x2b/0xc0
[  274.280326]  dev_queue_xmit+0x10/0x20
[  274.283984]  ? dev_queue_xmit+0x10/0x20
[  274.287813]  arp_xmit+0x1a/0xf0
[  274.290952]  arp_send_dst.part.19+0x46/0x60
[  274.295138]  arp_solicit+0x177/0x6b0
[  274.298708]  ? mod_timer+0x18e/0x440
[  274.302281]  neigh_probe+0x57/0x70
[  274.305684]  __neigh_event_send+0x197/0x2d0
[  274.309862]  neigh_resolve_output+0x18c/0x210
[  274.314212]  ip_finish_output2+0x257/0x690
[  274.318304]  ip_finish_output+0x219/0x340
[  274.322314]  ? ip_finish_output+0x219/0x340
[  274.326493]  ip_output+0x76/0x240
[  274.329805]  ? ip_fragment.constprop.53+0x80/0x80
[  274.334510]  ip_local_out+0x3f/0x70
[  274.337992]  ip_send_skb+0x19/0x40
[  274.341391]  ip_push_pending_frames+0x33/0x40
[  274.345740]  raw_sendmsg+0xc15/0x11d0
[  274.349403]  ? __might_fault+0x85/0x90
[  274.353151]  ? _copy_from_user+0x6b/0xa0
[  274.357070]  ? rw_copy_check_uvector+0x54/0x130
[  274.361604]  inet_sendmsg+0x42/0x1c0
[  274.365179]  ? inet_sendmsg+0x42/0x1c0
[  274.368937]  sock_sendmsg+0x3e/0x50
[  274.372460]  ___sys_sendmsg+0x26f/0x2d0
[  274.376293]  ? lock_acquire+0x95/0x190
[  274.380043]  ? __handle_mm_fault+0x7ce/0xb70
[  274.384307]  ? lock_acquire+0x95/0x190
[  274.388053]  ? __audit_syscall_entry+0xdd/0x130
[  274.392586]  ? ktime_get_coarse_real_ts64+0x64/0xc0
[  274.397461]  ? __audit_syscall_entry+0xdd/0x130
[  274.401989]  ? trace_hardirqs_on+0x4c/0x100
[  274.406173]  __sys_sendmsg+0x63/0xa0
[  274.409744]  ? __sys_sendmsg+0x63/0xa0
[  274.413488]  __x64_sys_sendmsg+0x1f/0x30
[  274.417405]  do_syscall_64+0x55/0x190
[  274.421064]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  274.426113] RIP: 0033:0x7ff4ae0e6e87
[  274.429686] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00
00 00 00 8b 05 ca d9 2b 00 48 63 d2 48 63 ff 85 c0 75 10 b8 2e 00 00
00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 53 48 89 f3 48 83 ec 10 48 89 7c
24 08
[  274.448422] RSP: 002b:00007ffcd9b76db8 EFLAGS: 00000246 ORIG_RAX:
000000000000002e
[  274.455978] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007ff4ae0e6e87
[  274.463104] RDX: 0000000000000000 RSI: 00000000006092e0 RDI: 0000000000000003
[  274.470228] RBP: 0000000000000000 R08: 00007ffcd9bc40a0 R09: 00007ffcd9bc4080
[  274.477349] R10: 000000000000060a R11: 0000000000000246 R12: 0000000000000003
[  274.484475] R13: 0000000000000016 R14: 00007ffcd9b77fa0 R15: 00007ffcd9b78da4
[  274.491602] Modules linked in: cls_bpf sch_ingress iptable_filter
ip_tables algif_hash af_alg x86_pkg_temp_thermal fuse [last unloaded:
test_bpf]
[  274.504634] CR2: 0000000000000000
[  274.507976] ---[ end trace 196d18386545eae1 ]---
[  274.512588] RIP: 0010:          (null)
[  274.516334] Code: Bad RIP value.
[  274.519557] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286
[  274.524775] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000
[  274.531921] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0
[  274.539082] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c
[  274.546205] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000
[  274.553329] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400
[  274.560456] FS:  00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000)
knlGS:0000000000000000
[  274.568541] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  274.574277] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0
[  274.581403] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  274.588535] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  274.595658] Kernel panic - not syncing: Fatal exception in interrupt
[  274.602046] Kernel Offset: 0x14400000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[  274.612827] ---[ end Kernel panic - not syncing: Fatal exception in
interrupt ]---
[  274.620387] ------------[ cut here ]------------

I'm also seeing the same failure on x86_64, and it reproduces
consistently.

>From poking around it looks like the skb's dst entry is being used
to calculate the mtu in:

mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;

...but because that dst_entry  has an "ops" value set to md_dst_ops,
the various ops (including mtu) are not set:

crash> struct sk_buff._skb_refdst ffff928f87447700 -x
      _skb_refdst = 0xffffcd6fbf5ea590
crash> struct dst_entry.ops 0xffffcd6fbf5ea590
  ops = 0xffffffffa0193800
crash> struct dst_ops.mtu 0xffffffffa0193800
  mtu = 0x0
crash>

I confirmed that the dst entry also has dst->input set to
dst_md_discard, so it looks like it's an entry that's been
initialized via __metadata_dst_init alright.

I think the fix here is to use skb_valid_dst(skb) - it checks
for  DST_METADATA also, and with that fix in place, the
problem - which was previously 100% reproducible - disappears.

The below patch resolves the panic and all bpf tunnel tests pass
without incident.

Fixes: c8b34e680a09 ("ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2756fb725bf0..a5d8cad18ead 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -515,9 +515,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
 					- sizeof(struct iphdr) - tunnel_hlen;
 	else
-		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
-	skb_dst_update_pmtu(skb, mtu);
+	if (skb_valid_dst(skb))
+		skb_dst_update_pmtu(skb, mtu);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		if (!skb_is_gso(skb) &&
@@ -530,9 +531,11 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+		struct rt6_info *rt6;
 		__be32 daddr;
 
+		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+					   NULL;
 		daddr = md ? dst : tunnel->parms.iph.daddr;
 
 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&

From 27da0d2ef998e222a876c0cec72aa7829a626266 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Mar 2019 11:52:36 +0100
Subject: [PATCH 06/72] appletalk: Fix compile regression

A bugfix just broke compilation of appletalk when CONFIG_SYSCTL
is disabled:

In file included from net/appletalk/ddp.c:65:
net/appletalk/ddp.c: In function 'atalk_init':
include/linux/atalk.h:164:34: error: expected expression before 'do'
 #define atalk_register_sysctl()  do { } while(0)
                                  ^~
net/appletalk/ddp.c:1934:7: note: in expansion of macro 'atalk_register_sysctl'
  rc = atalk_register_sysctl();

This is easier to avoid by using conventional inline functions
as stubs rather than macros. The header already has inline
functions for other purposes, so I'm changing over all the
macros for consistency.

Fixes: 6377f787aeb9 ("appletalk: Fix use-after-free in atalk_proc_exit")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 5a90f28d5ff2..d5cfc0b15b76 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -161,16 +161,26 @@ extern int sysctl_aarp_resolve_time;
 extern int atalk_register_sysctl(void);
 extern void atalk_unregister_sysctl(void);
 #else
-#define atalk_register_sysctl()		do { } while(0)
-#define atalk_unregister_sysctl()	do { } while(0)
+static inline int atalk_register_sysctl(void)
+{
+	return 0;
+}
+static inline void atalk_unregister_sysctl(void)
+{
+}
 #endif
 
 #ifdef CONFIG_PROC_FS
 extern int atalk_proc_init(void);
 extern void atalk_proc_exit(void);
 #else
-#define atalk_proc_init()	({ 0; })
-#define atalk_proc_exit()	do { } while(0)
+static inline int atalk_proc_init(void)
+{
+	return 0;
+}
+static inline void atalk_proc_exit(void)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
 #endif /* __LINUX_ATALK_H__ */

From 7b8376237618e4ddbf71277c8da6b2e0c57f0f70 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Mar 2019 11:52:37 +0100
Subject: [PATCH 07/72] appletalk: Add atalk.h header files to MAINTAINERS file

Add the path names here so that git-send-email can pick up the
netdev@vger.kernel.org Cc line automatically for a patch that
only touches the headers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index fbbb96236d98..f470eb4aeb21 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1034,6 +1034,8 @@ L:	netdev@vger.kernel.org
 S:	Odd fixes
 F:	drivers/net/appletalk/
 F:	net/appletalk/
+F:	include/linux/atalk.h
+F:	include/uapi/linux/atalk.h
 
 APPLIED MICRO (APM) X-GENE DEVICE TREE SUPPORT
 M:	Duc Dang <dhdang@apm.com>

From a10674bf2406afc2554f9c7d31b2dc65d6a27fd9 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 6 Mar 2019 14:10:22 +0300
Subject: [PATCH 08/72] tcp: detecting the misuse of .sendpage for Slab objects

sendpage was not designed for processing of the Slab pages,
in some situations it can trigger BUG_ON on receiving side.

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ad07dd71063d..dbb08140cdc9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -943,6 +943,10 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 	ssize_t copied;
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
+	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+	    WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
+		return -EINVAL;
+
 	/* Wait for a connection to finish. One exception is TCP Fast Open
 	 * (passive side) where data is allowed to be sent before a connection
 	 * is fully established.

From ecb3dea400d3beaf611ce76ac7a51d4230492cf2 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Wed, 6 Mar 2019 16:22:12 +0200
Subject: [PATCH 09/72] net: sched: flower: insert new filter to idr after
 setting its mask

When adding new filter to flower classifier, fl_change() inserts it to
handle_idr before initializing filter extensions and assigning it a mask.
Normally this ordering doesn't matter because all flower classifier ops
callbacks assume rtnl lock protection. However, when filter has an action
that doesn't have its kernel module loaded, rtnl lock is released before
call to request_module(). During this time the filter can be accessed bu
concurrent task before its initialization is completed, which can lead to a
crash.

Example case of NULL pointer dereference in concurrent dump:

Task 1                           Task 2

tc_new_tfilter()
 fl_change()
  idr_alloc_u32(fnew)
  fl_set_parms()
   tcf_exts_validate()
    tcf_action_init()
     tcf_action_init_1()
      rtnl_unlock()
      request_module()
      ...                        rtnl_lock()
      				 tc_dump_tfilter()
      				  tcf_chain_dump()
				   fl_walk()
				    idr_get_next_ul()
				    tcf_node_dump()
				     tcf_fill_node()
				      fl_dump()
				       mask = &f->mask->key; <- NULL ptr
      rtnl_lock()

Extension initialization and mask assignment don't depend on fnew->handle
that is allocated by idr_alloc_u32(). Move idr allocation code after action
creation and mask assignment in fl_change() to prevent concurrent access
to not fully initialized filter when rtnl lock is released to load action
module.

Fixes: 01683a146999 ("net: sched: refactor flower walk to iterate over idr")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 49 +++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 27300a3e76c7..c04247b403ed 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1348,6 +1348,24 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
+	if (tb[TCA_FLOWER_FLAGS]) {
+		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
+		if (!tc_flags_valid(fnew->flags)) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+
+	err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
+			   tp->chain->tmplt_priv, extack);
+	if (err)
+		goto errout;
+
+	err = fl_check_assign_mask(head, fnew, fold, mask);
+	if (err)
+		goto errout;
+
 	if (!handle) {
 		handle = 1;
 		err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
@@ -1358,36 +1376,18 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 				    handle, GFP_KERNEL);
 	}
 	if (err)
-		goto errout;
+		goto errout_mask;
 	fnew->handle = handle;
 
-	if (tb[TCA_FLOWER_FLAGS]) {
-		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
-
-		if (!tc_flags_valid(fnew->flags)) {
-			err = -EINVAL;
-			goto errout_idr;
-		}
-	}
-
-	err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
-			   tp->chain->tmplt_priv, extack);
-	if (err)
-		goto errout_idr;
-
-	err = fl_check_assign_mask(head, fnew, fold, mask);
-	if (err)
-		goto errout_idr;
-
 	if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) {
 		err = -EEXIST;
-		goto errout_mask;
+		goto errout_idr;
 	}
 
 	err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
 				     fnew->mask->filter_ht_params);
 	if (err)
-		goto errout_mask;
+		goto errout_idr;
 
 	if (!tc_skip_hw(fnew->flags)) {
 		err = fl_hw_replace_filter(tp, fnew, extack);
@@ -1426,12 +1426,13 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
 			       fnew->mask->filter_ht_params);
 
-errout_mask:
-	fl_mask_put(head, fnew->mask, false);
-
 errout_idr:
 	if (!fold)
 		idr_remove(&head->handle_idr, fnew->handle);
+
+errout_mask:
+	fl_mask_put(head, fnew->mask, false);
+
 errout:
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);

From 6caabe7f197d3466d238f70915d65301f1716626 Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Wed, 6 Mar 2019 22:45:01 +0800
Subject: [PATCH 10/72] net: hsr: fix memory leak in hsr_dev_finalize()

If hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER) failed to
add port, it directly returns res and forgets to free the node
that allocated in hsr_create_self_node(), and forgets to delete
the node->mac_list linked in hsr->self_node_db.

BUG: memory leak
unreferenced object 0xffff8881cfa0c780 (size 64):
  comm "syz-executor.0", pid 2077, jiffies 4294717969 (age 2415.377s)
  hex dump (first 32 bytes):
    e0 c7 a0 cf 81 88 ff ff 00 02 00 00 00 00 ad de  ................
    00 e6 49 cd 81 88 ff ff c0 9b 87 d0 81 88 ff ff  ..I.............
  backtrace:
    [<00000000e2ff5070>] hsr_dev_finalize+0x736/0x960 [hsr]
    [<000000003ed2e597>] hsr_newlink+0x2b2/0x3e0 [hsr]
    [<000000003fa8c6b6>] __rtnl_newlink+0xf1f/0x1600 net/core/rtnetlink.c:3182
    [<000000001247a7ad>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3240
    [<00000000e7d1b61d>] rtnetlink_rcv_msg+0x54e/0xb90 net/core/rtnetlink.c:5130
    [<000000005556bd3a>] netlink_rcv_skb+0x129/0x340 net/netlink/af_netlink.c:2477
    [<00000000741d5ee6>] netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
    [<00000000741d5ee6>] netlink_unicast+0x49a/0x650 net/netlink/af_netlink.c:1336
    [<000000009d56f9b7>] netlink_sendmsg+0x88b/0xdf0 net/netlink/af_netlink.c:1917
    [<0000000046b35c59>] sock_sendmsg_nosec net/socket.c:621 [inline]
    [<0000000046b35c59>] sock_sendmsg+0xc3/0x100 net/socket.c:631
    [<00000000d208adc9>] __sys_sendto+0x33e/0x560 net/socket.c:1786
    [<00000000b582837a>] __do_sys_sendto net/socket.c:1798 [inline]
    [<00000000b582837a>] __se_sys_sendto net/socket.c:1794 [inline]
    [<00000000b582837a>] __x64_sys_sendto+0xdd/0x1b0 net/socket.c:1794
    [<00000000c866801d>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
    [<00000000fea382d9>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
    [<00000000e01dacb3>] 0xffffffffffffffff

Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c   |  4 +++-
 net/hsr/hsr_framereg.c | 12 ++++++++++++
 net/hsr/hsr_framereg.h |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index b8cd43c9ed5b..c4676bacb8db 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -486,7 +486,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 
 	res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
 	if (res)
-		return res;
+		goto err_add_port;
 
 	res = register_netdevice(hsr_dev);
 	if (res)
@@ -506,6 +506,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 fail:
 	hsr_for_each_port(hsr, port)
 		hsr_del_port(port);
+err_add_port:
+	hsr_del_node(&hsr->self_node_db);
 
 	return res;
 }
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 286ceb41ac0c..9af16cb68f76 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db,
 	return 0;
 }
 
+void hsr_del_node(struct list_head *self_node_db)
+{
+	struct hsr_node *node;
+
+	rcu_read_lock();
+	node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+	rcu_read_unlock();
+	if (node) {
+		list_del_rcu(&node->mac_list);
+		kfree(node);
+	}
+}
 
 /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA;
  * seq_out is used to initialize filtering of outgoing duplicate frames
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 370b45998121..531fd3dfcac1 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -16,6 +16,7 @@
 
 struct hsr_node;
 
+void hsr_del_node(struct list_head *self_node_db);
 struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 			      u16 seq_out);
 struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,

From 6466e715651f9f358e60c5ea4880e4731325827f Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Wed, 6 Mar 2019 13:01:36 -0500
Subject: [PATCH 11/72] tcp: do not report TCP_CM_INQ of 0 for closed
 connections

Returning 0 as inq to userspace indicates there is no more data to
read, and the application needs to wait for EPOLLIN. For a connection
that has received FIN from the remote peer, however, the application
must continue reading until getting EOF (return value of 0
from tcp_recvmsg) or an error, if edge-triggered epoll (EPOLLET) is
being used. Otherwise, the application will never receive a new
EPOLLIN, since there is no epoll edge after the FIN.

Return 1 when there is no data left on the queue but the
connection has received FIN, so that the applications continue
reading.

Fixes: b75eba76d3d72 (tcp: send in-queue bytes in cmsg upon read)
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dbb08140cdc9..6baa6dc1b13b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1937,6 +1937,11 @@ static int tcp_inq_hint(struct sock *sk)
 		inq = tp->rcv_nxt - tp->copied_seq;
 		release_sock(sk);
 	}
+	/* After receiving a FIN, tell the user-space to continue reading
+	 * by returning a non-zero inq.
+	 */
+	if (inq == 0 && sock_flag(sk, SOCK_DONE))
+		inq = 1;
 	return inq;
 }
 

From 78c3aff834f7939d1a2570e366e33f2a880d4d9e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 4 Mar 2019 21:34:12 +0100
Subject: [PATCH 12/72] bpf: fix sysctl.c warning

When CONFIG_BPF_SYSCALL or CONFIG_SYSCTL is disabled, we get
a warning about an unused function:

kernel/sysctl.c:3331:12: error: 'proc_dointvec_minmax_bpf_stats' defined but not used [-Werror=unused-function]
 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,

The CONFIG_BPF_SYSCALL check was already handled, but the SYSCTL check
is needed on top.

Fixes: 492ecee892c2 ("bpf: enable program stats")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christian Brauner <christian@brauner.io>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7578e21a711b..2322b12cb4cf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3274,7 +3274,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
 
 #endif /* CONFIG_PROC_SYSCTL */
 
-#ifdef CONFIG_BPF_SYSCALL
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
 					  void __user *buffer, size_t *lenp,
 					  loff_t *ppos)

From 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 6 Mar 2019 14:35:15 -0500
Subject: [PATCH 13/72] bpf: only test gso type on gso packets

BPF can adjust gso only for tcp bytestreams. Fail on other gso types.

But only on gso packets. It does not touch this field if !gso_size.

Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skbuff.h | 4 ++--
 net/core/filter.c      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 27beb549ffbe..f32f32407dc4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4232,10 +4232,10 @@ static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
 }
 
+/* Note: Should be called only if skb_is_gso(skb) is true */
 static inline bool skb_is_gso_tcp(const struct sk_buff *skb)
 {
-	return skb_is_gso(skb) &&
-	       skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6);
+	return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6);
 }
 
 static inline void skb_gso_reset(struct sk_buff *skb)
diff --git a/net/core/filter.c b/net/core/filter.c
index 5ceba98069d4..f274620945ff 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2804,7 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2845,7 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2970,7 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2999,7 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);

From 5b4f21b2a5c4f41de3e49eef66d2fa566f23971a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 5 Mar 2019 17:31:13 +0000
Subject: [PATCH 14/72] bpf: hbm: fix spelling mistake "deault" -> "default"

There are a couple of typos, fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/hbm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index 8408ccb7409f..a79828ab273f 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -18,7 +18,7 @@
  *		Default is /hbm1
  *    -r <rate>	Rate limit in Mbps
  *    -s	Get HBM stats (marked, dropped, etc.)
- *    -t <time>	Exit after specified seconds (deault is 0)
+ *    -t <time>	Exit after specified seconds (default is 0)
  *    -w	Work conserving flag. cgroup can increase its bandwidth
  *		beyond the rate limit specified while there is available
  *		bandwidth. Current implementation assumes there is only
@@ -376,7 +376,7 @@ static void Usage(void)
 	       "               Default is /hbm1\n"
 	       "    -r <rate>  Rate in Mbps\n"
 	       "    -s         Update HBM stats\n"
-	       "    -t <time>  Exit after specified seconds (deault is 0)\n"
+	       "    -t <time>  Exit after specified seconds (default is 0)\n"
 	       "    -w	       Work conserving flag. cgroup can increase\n"
 	       "               bandwidth beyond the rate limit specified\n"
 	       "               while there is available bandwidth. Current\n"

From 915905f8b1d452e70ee6d8637c3f0fb55a39691d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Mar 2019 09:31:26 -0800
Subject: [PATCH 15/72] xsk: fix potential crash in xsk_diag_put_umem()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes two typos in xsk_diag_put_umem()

syzbot reported the following crash :

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 1 PID: 7641 Comm: syz-executor946 Not tainted 5.0.0-rc7+ #95
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline]
RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline]
RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143
Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85
RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203
RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c
RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014
RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174
R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000
R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40
FS:  00000000011ea880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000080 CR3: 000000008fa13000 CR4: 00000000001406e0
Call Trace:
 netlink_dump+0x55d/0xfb0 net/netlink/af_netlink.c:2252
 __netlink_dump_start+0x5b4/0x7e0 net/netlink/af_netlink.c:2360
 netlink_dump_start include/linux/netlink.h:226 [inline]
 xsk_diag_handler_dump+0x1b2/0x250 net/xdp/xsk_diag.c:170
 __sock_diag_cmd net/core/sock_diag.c:232 [inline]
 sock_diag_rcv_msg+0x322/0x410 net/core/sock_diag.c:263
 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485
 sock_diag_rcv+0x2b/0x40 net/core/sock_diag.c:274
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:632
 sock_write_iter+0x27c/0x3e0 net/socket.c:923
 call_write_iter include/linux/fs.h:1863 [inline]
 do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680
 do_iter_write fs/read_write.c:956 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:937
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001
 do_writev+0xf6/0x290 fs/read_write.c:1036
 __do_sys_writev fs/read_write.c:1109 [inline]
 __se_sys_writev fs/read_write.c:1106 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x440139
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffcc966cc18 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440139
RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8
R10: 0000000000000004 R11: 0000000000000246 R12: 00000000004019c0
R13: 0000000000401a50 R14: 0000000000000000 R15: 0000000000000000
Modules linked in:
---[ end trace 460a3c24d0a656c9 ]---
RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline]
RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline]
RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143
Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85
RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203
RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c
RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014
RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174
R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000
R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40
FS:  00000000011ea880(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000001d22000 CR3: 000000008fa13000 CR4: 00000000001406f0

Fixes: a36b38aa2af6 ("xsk: add sock_diag interface for AF_XDP")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Björn Töpel <bjorn.topel@intel.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk_diag.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 661d007c3b28..d5e06c8e0cbf 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -68,9 +68,9 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
 	err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
 
 	if (!err && umem->fq)
-		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
+		err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb);
 	if (!err && umem->cq) {
-		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
+		err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING,
 					nlskb);
 	}
 	return err;

From ea0371f7879987cff70e21d808e3e9fea624c051 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Mon, 4 Mar 2019 16:27:08 -0800
Subject: [PATCH 16/72] net: fix GSO in bpf_lwt_push_ip_encap

GSO needs inner headers and inner protocol set properly to work.

skb->inner_mac_header: skb_reset_inner_headers() assigns the current
mac header value to inner_mac_header; but it is not set at the point,
so we need to call skb_reset_inner_mac_header, otherwise gre_gso_segment
fails: it does

    int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
    ...
    if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
    ...

skb->inner_protocol should also be correctly set.

Fixes: ca78801a81e0 ("bpf: handle GSO in bpf_lwt_push_encap")
Signed-off-by: Peter Oskolkov <posk@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/lwt_bpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index cf2f8897ca19..126d31ff5ee3 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -625,6 +625,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
 
 	/* push the encap headers and fix pointers */
 	skb_reset_inner_headers(skb);
+	skb_reset_inner_mac_header(skb);  /* mac header is not yet set */
+	skb_set_inner_protocol(skb, skb->protocol);
 	skb->encapsulation = 1;
 	skb_push(skb, len);
 	if (ingress)

From 17a90a78847324b1a34b0de833492cbd2366361d Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Mon, 4 Mar 2019 16:27:09 -0800
Subject: [PATCH 17/72] selftests/bpf: test that GSO works in lwt_ip_encap

Add a test on egress that a large TCP packet successfully goes through
the lwt+bpf encap tunnel.

Although there is no direct evidence that GSO worked, as opposed to
e.g. TCP segmentation or IP fragmentation (maybe a kernel stats counter
should be added to track the number of failed GSO attempts?), without
the previous patch in the patchset this test fails, and printk-debugging
showed that software-based GSO succeeded here (veth is not compatible with
SKB_GSO_DODGY, so GSO happens in the software stack).

Also removed an unnecessary nodad and added a missed failed flag.

Signed-off-by: Peter Oskolkov <posk@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/test_lwt_ip_encap.sh        | 54 ++++++++++++++++++-
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 612632c1425f..d4d3391cc13a 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -78,6 +78,8 @@ TEST_STATUS=0
 TESTS_SUCCEEDED=0
 TESTS_FAILED=0
 
+TMPFILE=""
+
 process_test_results()
 {
 	if [[ "${TEST_STATUS}" -eq 0 ]] ; then
@@ -147,7 +149,6 @@ setup()
 	ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
 	ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
 
-
 	ip -netns ${NS1} link set dev veth1 up
 	ip -netns ${NS2} link set dev veth2 up
 	ip -netns ${NS2} link set dev veth3 up
@@ -205,7 +206,7 @@ setup()
 	# configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
 	ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
 	ip -netns ${NS3} link set gre_dev up
-	ip -netns ${NS3} addr add ${IPv4_GRE} nodad dev gre_dev
+	ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
 	ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6}
 	ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8}
 
@@ -222,12 +223,18 @@ setup()
 	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
 	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
 
+	TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
+
 	sleep 1  # reduce flakiness
 	set +e
 }
 
 cleanup()
 {
+	if [ -f ${TMPFILE} ] ; then
+		rm ${TMPFILE}
+	fi
+
 	ip netns del ${NS1} 2> /dev/null
 	ip netns del ${NS2} 2> /dev/null
 	ip netns del ${NS3} 2> /dev/null
@@ -278,6 +285,46 @@ test_ping()
 	fi
 }
 
+test_gso()
+{
+	local readonly PROTO=$1
+	local readonly PKT_SZ=5000
+	local IP_DST=""
+	: > ${TMPFILE}  # trim the capture file
+
+	# check that nc is present
+	command -v nc >/dev/null 2>&1 || \
+		{ echo >&2 "nc is not available: skipping TSO tests"; return; }
+
+	# listen on IPv*_DST, capture TCP into $TMPFILE
+	if [ "${PROTO}" == "IPv4" ] ; then
+		IP_DST=${IPv4_DST}
+		ip netns exec ${NS3} bash -c \
+			"nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &"
+	elif [ "${PROTO}" == "IPv6" ] ; then
+		IP_DST=${IPv6_DST}
+		ip netns exec ${NS3} bash -c \
+			"nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &"
+		RET=$?
+	else
+		echo "    test_gso: unknown PROTO: ${PROTO}"
+		TEST_STATUS=1
+	fi
+	sleep 1  # let nc start listening
+
+	# send a packet larger than MTU
+	ip netns exec ${NS1} bash -c \
+		"dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
+	sleep 2 # let the packet get delivered
+
+	# verify we received all expected bytes
+	SZ=$(stat -c %s ${TMPFILE})
+	if [ "$SZ" != "$PKT_SZ" ] ; then
+		echo "    test_gso failed: ${PROTO}"
+		TEST_STATUS=1
+	fi
+}
+
 test_egress()
 {
 	local readonly ENCAP=$1
@@ -307,6 +354,8 @@ test_egress()
 	fi
 	test_ping IPv4 0
 	test_ping IPv6 0
+	test_gso IPv4
+	test_gso IPv6
 
 	# a negative test: remove routes to GRE devices: ping fails
 	remove_routes_to_gredev
@@ -350,6 +399,7 @@ test_ingress()
 		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
 	else
 		echo "FAIL: unknown encap ${ENCAP}"
+		TEST_STATUS=1
 	fi
 	test_ping IPv4 0
 	test_ping IPv6 0

From e78e00bd478a43ee01db726ce488424a371e8684 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 6 Mar 2019 11:59:26 -0800
Subject: [PATCH 18/72] selftests: bpf: fix compilation with out-of-tree
 $(OUTPUT)

A bunch of related changes lumped together:
* Create prog_tests and verifier output directories; these don't exist with
  out-of-tree $(OUTPUT)
* Add missing -I (via separate TEST_{PROGS,VERIFIER}_CFLAGS) for the main tree
  ($(PWD) != $(OUTPUT) for out-of-tree)
* Add libbpf.a dependency for test_progs_32 (parallel make fails otherwise)
* Add missing "; \" after "cd" when generating test.h headers

Tested by:
$ alias m="make -s -j$(nproc)"
$ m -C tools/testing/selftests/bpf/ clean
$ m -C tools/lib/bpf/ clean
$ rm -rf xxx; mkdir xxx; m -C tools/testing/selftests/bpf/ OUTPUT=$PWD/xxx
$ m -C tools/testing/selftests/bpf/

Fixes: 3f30658830f3 ("selftests: bpf: break up test_progs - preparations")
Fixes: 2dfb40121ee8 ("selftests: bpf: prepare for break up of verifier tests")
Fixes: 3ef84346c561 ("selftests: bpf: makefile support sub-register code-gen test mode")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile | 33 +++++++++++++++++++---------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 518cd587cd63..2aed37ea61a4 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -153,6 +153,9 @@ endif
 endif
 endif
 
+TEST_PROGS_CFLAGS := -I. -I$(OUTPUT)
+TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier
+
 ifneq ($(SUBREG_CODEGEN),)
 ALU32_BUILD_DIR = $(OUTPUT)/alu32
 TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32
@@ -162,13 +165,15 @@ $(ALU32_BUILD_DIR):
 $(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read
 	cp $< $@
 
-$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \
+$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\
+						$(ALU32_BUILD_DIR) \
 						$(ALU32_BUILD_DIR)/urandom_read
-	$(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \
-		trace_helpers.c prog_tests/*.c $(OUTPUT)/libbpf.a $(LDLIBS)
+	$(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \
+		-o $(ALU32_BUILD_DIR)/test_progs_32 \
+		test_progs.c trace_helpers.c prog_tests/*.c \
+		$(OUTPUT)/libbpf.a $(LDLIBS)
 
 $(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H)
-$(ALU32_BUILD_DIR)/test_progs_32: CFLAGS += -I$(OUTPUT)
 $(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c
 
 $(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \
@@ -202,12 +207,16 @@ endif
 
 PROG_TESTS_H := $(OUTPUT)/prog_tests/tests.h
 $(OUTPUT)/test_progs: $(PROG_TESTS_H)
-$(OUTPUT)/test_progs: CFLAGS += -I$(OUTPUT)
+$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
 $(OUTPUT)/test_progs: prog_tests/*.c
 
+PROG_TESTS_DIR = $(OUTPUT)/prog_tests
+$(PROG_TESTS_DIR):
+	mkdir -p $@
+
 PROG_TESTS_FILES := $(wildcard prog_tests/*.c)
-$(PROG_TESTS_H): $(PROG_TESTS_FILES)
-	$(shell ( cd prog_tests/
+$(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES)
+	$(shell ( cd prog_tests/; \
 		  echo '/* Generated header, do not edit */'; \
 		  echo '#ifdef DECLARE'; \
 		  ls *.c 2> /dev/null | \
@@ -221,11 +230,15 @@ $(PROG_TESTS_H): $(PROG_TESTS_FILES)
 
 VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h
 $(OUTPUT)/test_verifier: $(VERIFIER_TESTS_H)
-$(OUTPUT)/test_verifier: CFLAGS += -I$(OUTPUT)
+$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
+
+VERIFIER_TESTS_DIR = $(OUTPUT)/verifier
+$(VERIFIER_TESTS_DIR):
+	mkdir -p $@
 
 VERIFIER_TEST_FILES := $(wildcard verifier/*.c)
-$(OUTPUT)/verifier/tests.h: $(VERIFIER_TEST_FILES)
-	$(shell ( cd verifier/
+$(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES)
+	$(shell ( cd verifier/; \
 		  echo '/* Generated header, do not edit */'; \
 		  echo '#ifdef FILL_ARRAY'; \
 		  ls *.c 2> /dev/null | \

From 8e2688876c7f7073d925e1f150e86b8ed3338f52 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 6 Mar 2019 11:59:27 -0800
Subject: [PATCH 19/72] libbpf: force fixdep compilation at the start of the
 build

libbpf targets don't explicitly depend on fixdep target, so when
we do 'make -j$(nproc)', there is a high probability, that some
objects will be built before fixdep binary is available.

Fix this by running sub-make; this makes sure that fixdep dependency
is properly accounted for.

For the same issue in perf, see commit abb26210a395 ("perf tools: Force
fixdep compilation at the start of the build").

Before:

$ rm -rf /tmp/bld; mkdir /tmp/bld; make -j$(nproc) O=/tmp/bld -C tools/lib/bpf/

Auto-detecting system features:
...                        libelf: [ on  ]
...                           bpf: [ on  ]

  HOSTCC   /tmp/bld/fixdep.o
  CC       /tmp/bld/libbpf.o
  CC       /tmp/bld/bpf.o
  CC       /tmp/bld/btf.o
  CC       /tmp/bld/nlattr.o
  CC       /tmp/bld/libbpf_errno.o
  CC       /tmp/bld/str_error.o
  CC       /tmp/bld/netlink.o
  CC       /tmp/bld/bpf_prog_linfo.o
  CC       /tmp/bld/libbpf_probes.o
  CC       /tmp/bld/xsk.o
  HOSTLD   /tmp/bld/fixdep-in.o
  LINK     /tmp/bld/fixdep
  LD       /tmp/bld/libbpf-in.o
  LINK     /tmp/bld/libbpf.a
  LINK     /tmp/bld/libbpf.so
  LINK     /tmp/bld/test_libbpf

$ head /tmp/bld/.libbpf.o.cmd
 # cannot find fixdep (/usr/local/google/home/sdf/src/linux/xxx//fixdep)
 # using basic dep data

/tmp/bld/libbpf.o: libbpf.c /usr/include/stdc-predef.h \
 /usr/include/stdlib.h /usr/include/features.h \
 /usr/include/x86_64-linux-gnu/sys/cdefs.h \
 /usr/include/x86_64-linux-gnu/bits/wordsize.h \
 /usr/include/x86_64-linux-gnu/gnu/stubs.h \
 /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
 /usr/lib/gcc/x86_64-linux-gnu/7/include/stddef.h \

After:

$ rm -rf /tmp/bld; mkdir /tmp/bld; make -j$(nproc) O=/tmp/bld -C tools/lib/bpf/

Auto-detecting system features:
...                        libelf: [ on  ]
...                           bpf: [ on  ]

  HOSTCC   /tmp/bld/fixdep.o
  HOSTLD   /tmp/bld/fixdep-in.o
  LINK     /tmp/bld/fixdep
  CC       /tmp/bld/libbpf.o
  CC       /tmp/bld/bpf.o
  CC       /tmp/bld/nlattr.o
  CC       /tmp/bld/btf.o
  CC       /tmp/bld/libbpf_errno.o
  CC       /tmp/bld/str_error.o
  CC       /tmp/bld/netlink.o
  CC       /tmp/bld/bpf_prog_linfo.o
  CC       /tmp/bld/libbpf_probes.o
  CC       /tmp/bld/xsk.o
  LD       /tmp/bld/libbpf-in.o
  LINK     /tmp/bld/libbpf.a
  LINK     /tmp/bld/libbpf.so
  LINK     /tmp/bld/test_libbpf

$ head /tmp/bld/.libbpf.o.cmd
cmd_/tmp/bld/libbpf.o := gcc -Wp,-MD,/tmp/bld/.libbpf.o.d -Wp,-MT,/tmp/bld/libbpf.o -g -Wall -DHAVE_LIBELF_MMAP_SUPPORT -DCOMPAT_NEED_REALLOCARRAY -Wbad-function-cast -Wdeclaration-after-statement -Wformat-security -Wformat-y2k -Winit-self -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-system-headers -Wold-style-definition -Wpacked -Wredundant-decls -Wshadow -Wstrict-prototypes -Wswitch-default -Wswitch-enum -Wundef -Wwrite-strings -Wformat -Wstrict-aliasing=3 -Werror -Wall -fPIC -I. -I/usr/local/google/home/sdf/src/linux/tools/include -I/usr/local/google/home/sdf/src/linux/tools/arch/x86/include/uapi -I/usr/local/google/home/sdf/src/linux/tools/include/uapi -fvisibility=hidden -D"BUILD_STR(s)=$(pound)s" -c -o /tmp/bld/libbpf.o libbpf.c

source_/tmp/bld/libbpf.o := libbpf.c

deps_/tmp/bld/libbpf.o := \
  /usr/include/stdc-predef.h \
  /usr/include/stdlib.h \
  /usr/include/features.h \
  /usr/include/x86_64-linux-gnu/sys/cdefs.h \
  /usr/include/x86_64-linux-gnu/bits/wordsize.h \

Fixes: 7c422f557266 ("tools build: Build fixdep helper from perf and basic libs")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index a05c43468bd0..61aaacf0cfa1 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -147,7 +147,8 @@ endif
 
 TARGETS = $(CMD_TARGETS)
 
-all: fixdep all_cmd
+all: fixdep
+	$(Q)$(MAKE) all_cmd
 
 all_cmd: $(CMD_TARGETS) check
 

From 69b09175d68241cb96ff9433d8e7a17382d720d6 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 6 Mar 2019 15:25:26 -0800
Subject: [PATCH 20/72] selftests: bpf: test_progs: initialize duration in
 singal_pending test

CHECK macro implicitly uses duration. We call CHECK() a couple of times
before duration is initialized from bpf_prog_test_run().
Explicitly set duration to 0 to avoid compiler warnings.

Fixes: 740f8a657221 ("selftests/bpf: make sure signal interrupts BPF_PROG_TEST_RUN")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/prog_tests/signal_pending.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index f2a37bbf91ab..996e808f43a2 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -12,7 +12,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
 	struct itimerval timeo = {
 		.it_value.tv_usec = 100000, /* 100ms */
 	};
-	__u32 duration, retval;
+	__u32 duration = 0, retval;
 	int prog_fd;
 	int err;
 	int i;

From e8e3437762ad938880dd48a3c52d702e7cf3c124 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 7 Mar 2019 11:35:43 +0100
Subject: [PATCH 21/72] bpf: Stop the psock parser before canceling its work

We might have never enabled (started) the psock's parser, in which case it
will not get stopped when destroying the psock. This leads to a warning
when trying to cancel parser's work from psock's deferred destructor:

[  405.325769] WARNING: CPU: 1 PID: 3216 at net/strparser/strparser.c:526 strp_done+0x3c/0x40
[  405.326712] Modules linked in: [last unloaded: test_bpf]
[  405.327359] CPU: 1 PID: 3216 Comm: kworker/1:164 Tainted: G        W         5.0.0 #42
[  405.328294] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014
[  405.329712] Workqueue: events sk_psock_destroy_deferred
[  405.330254] RIP: 0010:strp_done+0x3c/0x40
[  405.330706] Code: 28 e8 b8 d5 6b ff 48 8d bb 80 00 00 00 e8 9c d5 6b ff 48 8b 7b 18 48 85 ff 74 0d e8 1e a5 e8 ff 48 c7 43 18 00 00 00 00 5b c3 <0f> 0b eb cf 66 66 66 66 90 55 89 f5 53 48 89 fb 48 83 c7 28 e8 0b
[  405.332862] RSP: 0018:ffffc900026bbe50 EFLAGS: 00010246
[  405.333482] RAX: ffffffff819323e0 RBX: ffff88812cb83640 RCX: ffff88812cb829e8
[  405.334228] RDX: 0000000000000001 RSI: ffff88812cb837e8 RDI: ffff88812cb83640
[  405.335366] RBP: ffff88813fd22680 R08: 0000000000000000 R09: 000073746e657665
[  405.336472] R10: 8080808080808080 R11: 0000000000000001 R12: ffff88812cb83600
[  405.337760] R13: 0000000000000000 R14: ffff88811f401780 R15: ffff88812cb837e8
[  405.338777] FS:  0000000000000000(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000
[  405.339903] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  405.340821] CR2: 00007fb11489a6b8 CR3: 000000012d4d6000 CR4: 00000000000406e0
[  405.341981] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  405.343131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  405.344415] Call Trace:
[  405.344821]  sk_psock_destroy_deferred+0x23/0x1b0
[  405.345585]  process_one_work+0x1ae/0x3e0
[  405.346110]  worker_thread+0x3c/0x3b0
[  405.346576]  ? pwq_unbound_release_workfn+0xd0/0xd0
[  405.347187]  kthread+0x11d/0x140
[  405.347601]  ? __kthread_parkme+0x80/0x80
[  405.348108]  ret_from_fork+0x35/0x40
[  405.348566] ---[ end trace a4a3af4026a327d4 ]---

Stop psock's parser just before canceling its work.

Fixes: 1d79895aef18 ("sk_msg: Always cancel strp work before freeing the psock")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/skmsg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ae6f06e45737..cc94d921476c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -554,6 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
 	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
 
 	/* No sk_callback_lock since already detached. */
+	strp_stop(&psock->parser.strp);
 	strp_done(&psock->parser.strp);
 
 	cancel_work_sync(&psock->work);

From 20182390c4134478d795a096ddb8dddcc648e28a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 4 Mar 2019 21:08:53 +0100
Subject: [PATCH 22/72] bpf: fix replace_map_fd_with_map_ptr's ldimm64 second
 imm field

Non-zero imm value in the second part of the ldimm64 instruction for
BPF_PSEUDO_MAP_FD is invalid, and thus must be rejected. The map fd
only ever sits in the first instructions' imm field. None of the BPF
loaders known to us are using it, so risk of regression is minimal.
For clarity and consistency, the few insn->{src_reg,imm} occurrences
are rewritten into insn[0].{src_reg,imm}. Add a test case to the BPF
selftest suite as well.

Fixes: 0246e64d9a5f ("bpf: handle pseudo BPF_LD_IMM64 insn")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                           | 10 +++++-----
 tools/testing/selftests/bpf/verifier/ld_imm64.c | 15 ++++++++++++++-
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a7b96bf0e654..ce166a002d16 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6678,17 +6678,17 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 				/* valid generic load 64-bit imm */
 				goto next_insn;
 
-			if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
-				verbose(env,
-					"unrecognized bpf_ld_imm64 insn\n");
+			if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
+			    insn[1].imm != 0) {
+				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
 				return -EINVAL;
 			}
 
-			f = fdget(insn->imm);
+			f = fdget(insn[0].imm);
 			map = __bpf_map_get(f);
 			if (IS_ERR(map)) {
 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
-					insn->imm);
+					insn[0].imm);
 				return PTR_ERR(map);
 			}
 
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index 28b8c805a293..3856dba733e9 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -122,7 +122,7 @@
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
-	BPF_RAW_INSN(0, 0, 0, 0, 1),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "not pointing to valid bpf_map",
@@ -139,3 +139,16 @@
 	.errstr = "invalid bpf_ld_imm64 insn",
 	.result = REJECT,
 },
+{
+	"test14 ld_imm64: reject 2nd imm != 0",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_1,
+		     BPF_PSEUDO_MAP_FD, 0, 0),
+	BPF_RAW_INSN(0, 0, 0, 0, 0xfefefe),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 1 },
+	.errstr = "unrecognized bpf_ld_imm64 insn",
+	.result = REJECT,
+},

From f096ca63ca2a47a14892d1cf06cee99e78029541 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 7 Mar 2019 10:31:20 +0100
Subject: [PATCH 23/72] davinci_emac: always build in CONFIG_OF code

clang warns about what seems to be an unintended use of an obscure C
language feature where a forward declaration of an array remains usable
when the final definition is never seen:

drivers/net/ethernet/ti/davinci_emac.c:1694:34: error: tentative array definition assumed to have one element [-Werror]
static const struct of_device_id davinci_emac_of_match[];

There is no harm in always enabling the device tree matching code here,
and it makes the code behave in a more conventional way aside from
avoiding the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/davinci_emac.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 840820402cd0..57450b174fc4 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -2029,7 +2029,6 @@ static const struct dev_pm_ops davinci_emac_pm_ops = {
 	.resume		= davinci_emac_resume,
 };
 
-#if IS_ENABLED(CONFIG_OF)
 static const struct emac_platform_data am3517_emac_data = {
 	.version		= EMAC_VERSION_2,
 	.hw_ram_addr		= 0x01e20000,
@@ -2046,14 +2045,13 @@ static const struct of_device_id davinci_emac_of_match[] = {
 	{},
 };
 MODULE_DEVICE_TABLE(of, davinci_emac_of_match);
-#endif
 
 /* davinci_emac_driver: EMAC platform driver structure */
 static struct platform_driver davinci_emac_driver = {
 	.driver = {
 		.name	 = "davinci_emac",
 		.pm	 = &davinci_emac_pm_ops,
-		.of_match_table = of_match_ptr(davinci_emac_of_match),
+		.of_match_table = davinci_emac_of_match,
 	},
 	.probe = davinci_emac_probe,
 	.remove = davinci_emac_remove,

From 8a72b81e6df516847848556d0967aefa5457f11f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 7 Mar 2019 10:32:07 +0100
Subject: [PATCH 24/72] isdn: isdnloop: fix pointer dereference bug

clang has spotted an ancient code bug and warns about it with:

drivers/isdn/isdnloop/isdnloop.c:573:12: error: address of array 'card->rcard' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion]

This is an array of pointers, so we should check if a specific
pointer exists in the array before using it, not whether the
array itself exists.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/isdnloop/isdnloop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index f4253d468ae1..755c6bbc9553 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -570,7 +570,7 @@ isdnloop_atimeout(isdnloop_card *card, int ch)
 	char buf[60];
 
 	spin_lock_irqsave(&card->isdnloop_lock, flags);
-	if (card->rcard) {
+	if (card->rcard[ch]) {
 		isdnloop_fake(card->rcard[ch], "DDIS_I", card->rch[ch] + 1);
 		card->rcard[ch]->rcard[card->rch[ch]] = NULL;
 		card->rcard[ch] = NULL;

From ae9819e339b451da7a86ab6fe38ecfcb6814e78a Mon Sep 17 00:00:00 2001
From: Masaru Nagai <masaru.nagai.vx@renesas.com>
Date: Thu, 7 Mar 2019 11:24:47 +0100
Subject: [PATCH 25/72] ravb: Decrease TxFIFO depth of Q3 and Q2 to one

Hardware has the CBS (Credit Based Shaper) which affects only Q3
and Q2. When updating the CBS settings, even if the driver does so
after waiting for Tx DMA finished, there is a possibility that frame
data still remains in TxFIFO.

To avoid this, decrease TxFIFO depth of Q3 and Q2 to one.

This patch has been exercised this using netperf TCP_MAERTS, TCP_STREAM
and UDP_STREAM tests run on an Ebisu board. No performance change was
detected, outside of noise in the tests, both in terms of throughput and
CPU utilisation.

Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
Signed-off-by: Masaru Nagai <masaru.nagai.vx@renesas.com>
Signed-off-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
[simon: updated changelog]
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index d28c8f9ca55b..8154b38c08f7 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -458,7 +458,7 @@ static int ravb_dmac_init(struct net_device *ndev)
 		   RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR);
 
 	/* Set FIFO size */
-	ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00222200, TGC);
+	ravb_write(ndev, TGC_TQP_AVBMODE1 | 0x00112200, TGC);
 
 	/* Timestamp enable */
 	ravb_write(ndev, TCCR_TFEN, TCCR);

From a2ae6da025ed73e4312d983b5e57300bc77090f3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 7 Mar 2019 11:31:55 +0100
Subject: [PATCH 26/72] peak_usb: fix clang build warning

Clang points out undefined behavior when building the pcan_usb_pro driver:

drivers/net/can/usb/peak_usb/pcan_usb_pro.c:136:15: error: passing an object that undergoes default argument promotion to 'va_start' has undefined behavior [-Werror,-Wvarargs]

Changing the function prototype to avoid argument promotion in the
varargs call avoids the warning, and should make this well-defined.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index d516def846ab..b388406ac0f5 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -127,7 +127,7 @@ static u8 *pcan_msg_init_empty(struct pcan_usb_pro_msg *pm,
 /*
  * add one record to a message being built
  */
-static int pcan_msg_add_rec(struct pcan_usb_pro_msg *pm, u8 id, ...)
+static int pcan_msg_add_rec(struct pcan_usb_pro_msg *pm, int id, ...)
 {
 	int len, i;
 	u8 *pc;

From 43d281662fdb46750d49417559b71069f435298d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 7 Mar 2019 16:52:24 +0100
Subject: [PATCH 27/72] enic: fix build warning without CONFIG_CPUMASK_OFFSTACK

The enic driver relies on the CONFIG_CPUMASK_OFFSTACK feature to
dynamically allocate a struct member, but this is normally intended for
local variables.

Building with clang, I get a warning for a few locations that check the
address of the cpumask_var_t:

drivers/net/ethernet/cisco/enic/enic_main.c:122:22: error: address of array 'enic->msix[i].affinity_mask' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion]

As far as I can tell, the code is still correct, as the truth value of
the pointer is what we need in this configuration. To get rid of
the warning, use cpumask_available() instead of checking the
pointer directly.

Fixes: 322cf7e3a4e8 ("enic: assign affinity hint to interrupts")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 9a7f70db20c7..733d9172425b 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -119,7 +119,7 @@ static void enic_init_affinity_hint(struct enic *enic)
 
 	for (i = 0; i < enic->intr_count; i++) {
 		if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) ||
-		    (enic->msix[i].affinity_mask &&
+		    (cpumask_available(enic->msix[i].affinity_mask) &&
 		     !cpumask_empty(enic->msix[i].affinity_mask)))
 			continue;
 		if (zalloc_cpumask_var(&enic->msix[i].affinity_mask,
@@ -148,7 +148,7 @@ static void enic_set_affinity_hint(struct enic *enic)
 	for (i = 0; i < enic->intr_count; i++) {
 		if (enic_is_err_intr(enic, i)		||
 		    enic_is_notify_intr(enic, i)	||
-		    !enic->msix[i].affinity_mask	||
+		    !cpumask_available(enic->msix[i].affinity_mask) ||
 		    cpumask_empty(enic->msix[i].affinity_mask))
 			continue;
 		err = irq_set_affinity_hint(enic->msix_entry[i].vector,
@@ -161,7 +161,7 @@ static void enic_set_affinity_hint(struct enic *enic)
 	for (i = 0; i < enic->wq_count; i++) {
 		int wq_intr = enic_msix_wq_intr(enic, i);
 
-		if (enic->msix[wq_intr].affinity_mask &&
+		if (cpumask_available(enic->msix[wq_intr].affinity_mask) &&
 		    !cpumask_empty(enic->msix[wq_intr].affinity_mask))
 			netif_set_xps_queue(enic->netdev,
 					    enic->msix[wq_intr].affinity_mask,

From b89869da2db916914f6a2c7ab14183c8aef23d97 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Thu, 7 Mar 2019 07:56:35 -0800
Subject: [PATCH 28/72] qede: Fix internal loopback failure with jumbo mtu
 configuration

Driver uses port-mtu as packet-size for the loopback traffic. This patch
limits the max packet size to 1.5K to avoid data being split over multiple
buffer descriptors (BDs) in cases where MTU > PAGE_SIZE.

Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index c6238083e898..b4c8949933f1 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -1663,8 +1663,11 @@ static int qede_selftest_run_loopback(struct qede_dev *edev, u32 loopback_mode)
 	/* Wait for loopback configuration to apply */
 	msleep_interruptible(500);
 
-	/* prepare the loopback packet */
-	pkt_size = edev->ndev->mtu + ETH_HLEN;
+	/* Setting max packet size to 1.5K to avoid data being split over
+	 * multiple BDs in cases where MTU > PAGE_SIZE.
+	 */
+	pkt_size = (((edev->ndev->mtu < ETH_DATA_LEN) ?
+		     edev->ndev->mtu : ETH_DATA_LEN) + ETH_HLEN);
 
 	skb = netdev_alloc_skb(edev->ndev, pkt_size);
 	if (!skb) {

From 3499e87ea0413ee5b2cc028f4c8ed4d424bc7f98 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 7 Mar 2019 16:58:35 +0100
Subject: [PATCH 29/72] ethtool: reduce stack usage with clang

clang inlines the dev_ethtool() more aggressively than gcc does, leading
to a larger amount of used stack space:

net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=]

Marking the sub-functions that require the most stack space as
noinline_for_stack gives us reasonable behavior on all compilers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d4918ffddda8..b1eb32419732 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2319,9 +2319,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static int ethtool_get_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_get_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int ret;
@@ -2349,9 +2350,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev,
 	return 0;
 }
 
-static int ethtool_set_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_set_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int i, ret = 0;
@@ -2405,7 +2407,7 @@ static int ethtool_set_per_queue_coalesce(struct net_device *dev,
 	return ret;
 }
 
-static int ethtool_set_per_queue(struct net_device *dev,
+static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev,
 				 void __user *useraddr, u32 sub_cmd)
 {
 	struct ethtool_per_queue_op per_queue_opt;

From f9d19a7494e5341a7f256823e32788ae560ca22f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 09:57:42 -0700
Subject: [PATCH 30/72] net: atm: Use IS_ENABLED in atm_dev_ioctl

When building with -Wsometimes-uninitialized, Clang warns:

net/atm/resources.c:256:6: warning: variable 'number' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
net/atm/resources.c:212:7: warning: variable 'iobuf_len' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]

Clang won't realize that compat is 0 when CONFIG_COMPAT is not set until
the constant folding stage, which happens after this semantic analysis.
Use IS_ENABLED instead so that the zero is present at the semantic
analysis stage, which eliminates this warning.

Link: https://github.com/ClangBuiltLinux/linux/issues/386
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/resources.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/atm/resources.c b/net/atm/resources.c
index bada395ecdb1..3e9f6391319e 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -203,13 +203,9 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 	int __user *sioc_len;
 	int __user *iobuf_len;
 
-#ifndef CONFIG_COMPAT
-	compat = 0; /* Just so the compiler _knows_ */
-#endif
-
 	switch (cmd) {
 	case ATM_GETNAMES:
-		if (compat) {
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 			struct compat_atm_iobuf __user *ciobuf = arg;
 			compat_uptr_t cbuf;
@@ -253,7 +249,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 		break;
 	}
 
-	if (compat) {
+	if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 		struct compat_atmif_sioc __user *csioc = arg;
 		compat_uptr_t carg;

From df103170854e87124ee7bdd2bca64b178e653f97 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 11:00:28 -0700
Subject: [PATCH 31/72] net: stmmac: Avoid sometimes uninitialized Clang
 warnings

When building with -Wsometimes-uninitialized, Clang warns:

drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:495:3: warning: variable 'ns' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:495:3: warning: variable 'ns' is used uninitialized whenever '&&' condition is false [-Wsometimes-uninitialized]
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:532:3: warning: variable 'ns' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:532:3: warning: variable 'ns' is used uninitialized whenever '&&' condition is false [-Wsometimes-uninitialized]
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:741:3: warning: variable 'sec_inc' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:741:3: warning: variable 'sec_inc' is used uninitialized whenever '&&' condition is false [-Wsometimes-uninitialized]

Clang is concerned with the use of stmmac_do_void_callback (which
stmmac_get_timestamp and stmmac_config_sub_second_increment wrap),
as it may fail to initialize these values if the if condition was ever
false (meaning the callbacks don't exist). It's not wrong because the
callbacks (get_timestamp and config_sub_second_increment respectively)
are the ones that initialize the variables. While it's unlikely that the
callbacks are ever going to disappear and make that condition false, we
can easily avoid this warning by zero initialize the variables.

Link: https://github.com/ClangBuiltLinux/linux/issues/384
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e2a13ec2e30b..97c5e1aad88f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -480,7 +480,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 				   struct dma_desc *p, struct sk_buff *skb)
 {
 	struct skb_shared_hwtstamps shhwtstamp;
-	u64 ns;
+	u64 ns = 0;
 
 	if (!priv->hwts_tx_en)
 		return;
@@ -519,7 +519,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 {
 	struct skb_shared_hwtstamps *shhwtstamp = NULL;
 	struct dma_desc *desc = p;
-	u64 ns;
+	u64 ns = 0;
 
 	if (!priv->hwts_rx_en)
 		return;
@@ -564,8 +564,8 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 	u32 snap_type_sel = 0;
 	u32 ts_master_en = 0;
 	u32 ts_event_en = 0;
+	u32 sec_inc = 0;
 	u32 value = 0;
-	u32 sec_inc;
 	bool xmac;
 
 	xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;

From 0805a4b894a8daaf76ad99934563d8ecfc6e7aed Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 11:11:26 -0700
Subject: [PATCH 32/72] net: atm: Add another IS_ENABLED(CONFIG_COMPAT) in
 atm_dev_ioctl

I removed compat's universal assignment to 0, which allows this if
statement to fall through when compat is passed with a value other
than 0.

Fixes: f9d19a7494e5 ("net: atm: Use IS_ENABLED in atm_dev_ioctl")
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/resources.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/atm/resources.c b/net/atm/resources.c
index 3e9f6391319e..889349c6d90d 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -413,7 +413,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 		}
 		/* fall through */
 	default:
-		if (compat) {
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
 			if (!dev->ops->compat_ioctl) {
 				error = -EINVAL;

From eaab2d2d0fe4393b040dbf3922e18cd2ab7d6b85 Mon Sep 17 00:00:00 2001
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Date: Thu, 7 Mar 2019 10:52:13 -0800
Subject: [PATCH 33/72] nfp: fix simple vNIC mailbox length

The simple vNIC mailbox length should be 12 decimal and not 0x12.
Using a decimal also makes it clear this is a length value and not
another field within the simple mailbox defines.

Found by code inspection, there are no known firmware configurations
where this would cause issues.

Fixes: 527d7d1b9949 ("nfp: read mailbox address from TLV caps")
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 166d7f71442e..372adea10e14 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -392,7 +392,7 @@
 #define NFP_NET_CFG_MBOX_SIMPLE_CMD	0x0
 #define NFP_NET_CFG_MBOX_SIMPLE_RET	0x4
 #define NFP_NET_CFG_MBOX_SIMPLE_VAL	0x8
-#define NFP_NET_CFG_MBOX_SIMPLE_LEN	0x12
+#define NFP_NET_CFG_MBOX_SIMPLE_LEN	12
 
 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1
 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2

From 1e027960edfaa6a43f9ca31081729b716598112b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 Mar 2019 09:36:33 -0800
Subject: [PATCH 34/72] net/hsr: fix possible crash in add_timer()

syzbot found another add_timer() issue, this time in net/hsr [1]

Let's use mod_timer() which is safe.

[1]
kernel BUG at kernel/time/timer.c:1136!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 15909 Comm: syz-executor.3 Not tainted 5.0.0+ #97
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
kobject: 'loop2' (00000000f5629718): kobject_uevent_env
RIP: 0010:add_timer kernel/time/timer.c:1136 [inline]
RIP: 0010:add_timer+0x654/0xbe0 kernel/time/timer.c:1134
Code: 0f 94 c5 31 ff 44 89 ee e8 09 61 0f 00 45 84 ed 0f 84 77 fd ff ff e8 bb 5f 0f 00 e8 07 10 a0 ff e9 68 fd ff ff e8 ac 5f 0f 00 <0f> 0b e8 a5 5f 0f 00 0f 0b e8 9e 5f 0f 00 4c 89 b5 58 ff ff ff e9
RSP: 0018:ffff8880656eeca0 EFLAGS: 00010246
kobject: 'loop2' (00000000f5629718): fill_kobj_path: path = '/devices/virtual/block/loop2'
RAX: 0000000000040000 RBX: 1ffff1100caddd9a RCX: ffffc9000c436000
RDX: 0000000000040000 RSI: ffffffff816056c4 RDI: ffff88806a2f6cc8
RBP: ffff8880656eed58 R08: ffff888067f4a300 R09: ffff888067f4abc8
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88806a2f6cc0
R13: dffffc0000000000 R14: 0000000000000001 R15: ffff8880656eed30
FS:  00007fc2019bf700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000738000 CR3: 0000000067e8e000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 hsr_check_announce net/hsr/hsr_device.c:99 [inline]
 hsr_check_carrier_and_operstate+0x567/0x6f0 net/hsr/hsr_device.c:120
 hsr_netdev_notify+0x297/0xa00 net/hsr/hsr_main.c:51
 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739
 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline]
 call_netdevice_notifiers net/core/dev.c:1765 [inline]
 dev_open net/core/dev.c:1436 [inline]
 dev_open+0x143/0x160 net/core/dev.c:1424
 team_port_add drivers/net/team/team.c:1203 [inline]
 team_add_slave+0xa07/0x15d0 drivers/net/team/team.c:1933
 do_set_master net/core/rtnetlink.c:2358 [inline]
 do_set_master+0x1d4/0x230 net/core/rtnetlink.c:2332
 do_setlink+0x966/0x3510 net/core/rtnetlink.c:2493
 rtnl_setlink+0x271/0x3b0 net/core/rtnetlink.c:2747
 rtnetlink_rcv_msg+0x465/0xb00 net/core/rtnetlink.c:5192
 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485
 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5210
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:632
 sock_write_iter+0x27c/0x3e0 net/socket.c:923
 call_write_iter include/linux/fs.h:1869 [inline]
 do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680
 do_iter_write fs/read_write.c:956 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:937
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001
 do_writev+0xf6/0x290 fs/read_write.c:1036
 __do_sys_writev fs/read_write.c:1109 [inline]
 __se_sys_writev fs/read_write.c:1106 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457f29
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fc2019bec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457f29
RDX: 0000000000000001 RSI: 00000000200000c0 RDI: 0000000000000003
RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc2019bf6d4
R13: 00000000004c4a60 R14: 00000000004dd218 R15: 00000000ffffffff

Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Arvid Brodin <arvid.brodin@alten.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index c4676bacb8db..a97bf326b231 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev,
 			&& (old_operstate != IF_OPER_UP)) {
 		/* Went up */
 		hsr->announce_count = 0;
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
-		add_timer(&hsr->announce_timer);
+		mod_timer(&hsr->announce_timer,
+			  jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
 	}
 
 	if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
@@ -332,6 +331,7 @@ static void hsr_announce(struct timer_list *t)
 {
 	struct hsr_priv *hsr;
 	struct hsr_port *master;
+	unsigned long interval;
 
 	hsr = from_timer(hsr, t, announce_timer);
 
@@ -343,18 +343,16 @@ static void hsr_announce(struct timer_list *t)
 				hsr->protVersion);
 		hsr->announce_count++;
 
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+		interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
 	} else {
 		send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK,
 				hsr->protVersion);
 
-		hsr->announce_timer.expires = jiffies +
-				msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+		interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
 	}
 
 	if (is_admin_up(master->dev))
-		add_timer(&hsr->announce_timer);
+		mod_timer(&hsr->announce_timer, jiffies + interval);
 
 	rcu_read_unlock();
 }

From ee60ad219f5c7c4fb2f047f88037770063ef785f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 8 Mar 2019 14:50:54 +0800
Subject: [PATCH 35/72] route: set the deleted fnhe fnhe_daddr to 0 in
 ip_del_fnhe to fix a race

The race occurs in __mkroute_output() when 2 threads lookup a dst:

  CPU A                 CPU B
  find_exception()
                        find_exception() [fnhe expires]
                        ip_del_fnhe() [fnhe is deleted]
  rt_bind_exception()

In rt_bind_exception() it will bind a deleted fnhe with the new dst, and
this dst will get no chance to be freed. It causes a dev defcnt leak and
consecutive dmesg warnings:

  unregister_netdevice: waiting for ethX to become free. Usage count = 1

Especially thanks Jon to identify the issue.

This patch fixes it by setting fnhe_daddr to 0 in ip_del_fnhe() to stop
binding the deleted fnhe with a new dst when checking fnhe's fnhe_daddr
and daddr in rt_bind_exception().

It works as both ip_del_fnhe() and rt_bind_exception() are protected by
fnhe_lock and the fhne is freed by kfree_rcu().

Fixes: deed49df7390 ("route: check and remove route cache when we get route")
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8ca3642f0d9b..a5da63e5faa2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1303,6 +1303,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
 		if (fnhe->fnhe_daddr == daddr) {
 			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
 				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+			/* set fnhe_daddr to 0 to ensure it won't bind with
+			 * new dsts in rt_bind_exception().
+			 */
+			fnhe->fnhe_daddr = 0;
 			fnhe_flush_routes(fnhe);
 			kfree_rcu(fnhe, rcu);
 			break;

From 2e990dfd13974d9eae493006f42ffb48707970ef Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 8 Mar 2019 15:49:16 +0800
Subject: [PATCH 36/72] sctp: remove sched init from sctp_stream_init

syzbot reported a NULL-ptr deref caused by that sched->init() in
sctp_stream_init() set stream->rr_next = NULL.

  kasan: GPF could be caused by NULL-ptr deref or user memory access
  RIP: 0010:sctp_sched_rr_dequeue+0xd3/0x170 net/sctp/stream_sched_rr.c:141
  Call Trace:
    sctp_outq_dequeue_data net/sctp/outqueue.c:90 [inline]
    sctp_outq_flush_data net/sctp/outqueue.c:1079 [inline]
    sctp_outq_flush+0xba2/0x2790 net/sctp/outqueue.c:1205

All sched info is saved in sout->ext now, in sctp_stream_init()
sctp_stream_alloc_out() will not change it, there's no need to
call sched->init() again, since sctp_outq_init() has already
done it.

Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
Reported-by: syzbot+4c9934f20522c0efd657@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/stream.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 2936ed17bf9e..3b47457862cc 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -230,8 +230,6 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 	for (i = 0; i < stream->outcnt; i++)
 		SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
-	sched->init(stream);
-
 in:
 	sctp_stream_interleave_init(stream);
 	if (!incnt)

From 930c9f9125c85b5134b3e711bc252ecc094708e3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 Mar 2019 12:48:39 +0000
Subject: [PATCH 37/72] rxrpc: Fix client call connect/disconnect race

rxrpc_disconnect_client_call() reads the call's connection ID protocol
value (call->cid) as part of that function's variable declarations.  This
is bad because it's not inside the locked section and so may race with
someone granting use of the channel to the call.

This manifests as an assertion failure (see below) where the call in the
presumed channel (0 because call->cid wasn't set when we read it) doesn't
match the call attached to the channel we were actually granted (if 1, 2 or
3).

Fix this by moving the read and dependent calculations inside of the
channel_lock section.  Also, only set the channel number and pointer
variables if cid is not zero (ie. unset).

This problem can be induced by injecting an occasional error in
rxrpc_wait_for_channel() before the call to schedule().

Make two further changes also:

 (1) Add a trace for wait failure in rxrpc_connect_call().

 (2) Drop channel_lock before BUG'ing in the case of the assertion failure.

The failure causes a trace akin to the following:

rxrpc: Assertion failed - 18446612685268945920(0xffff8880beab8c00) == 18446612685268621312(0xffff8880bea69800) is false
------------[ cut here ]------------
kernel BUG at net/rxrpc/conn_client.c:824!
...
RIP: 0010:rxrpc_disconnect_client_call+0x2bf/0x99d
...
Call Trace:
 rxrpc_connect_call+0x902/0x9b3
 ? wake_up_q+0x54/0x54
 rxrpc_new_client_call+0x3a0/0x751
 ? rxrpc_kernel_begin_call+0x141/0x1bc
 ? afs_alloc_call+0x1b5/0x1b5
 rxrpc_kernel_begin_call+0x141/0x1bc
 afs_make_call+0x20c/0x525
 ? afs_alloc_call+0x1b5/0x1b5
 ? __lock_is_held+0x40/0x71
 ? lockdep_init_map+0xaf/0x193
 ? lockdep_init_map+0xaf/0x193
 ? __lock_is_held+0x40/0x71
 ? yfs_fs_fetch_data+0x33b/0x34a
 yfs_fs_fetch_data+0x33b/0x34a
 afs_fetch_data+0xdc/0x3b7
 afs_read_dir+0x52d/0x97f
 afs_dir_iterate+0xa0/0x661
 ? iterate_dir+0x63/0x141
 iterate_dir+0xa2/0x141
 ksys_getdents64+0x9f/0x11b
 ? filldir+0x111/0x111
 ? do_syscall_64+0x3e/0x1a0
 __x64_sys_getdents64+0x16/0x19
 do_syscall_64+0x7d/0x1a0
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/rxrpc.h |  2 ++
 net/rxrpc/conn_client.c      | 22 ++++++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 5b50fe4906d2..7b60fd186cfe 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -76,6 +76,7 @@ enum rxrpc_client_trace {
 	rxrpc_client_chan_disconnect,
 	rxrpc_client_chan_pass,
 	rxrpc_client_chan_unstarted,
+	rxrpc_client_chan_wait_failed,
 	rxrpc_client_cleanup,
 	rxrpc_client_count,
 	rxrpc_client_discard,
@@ -276,6 +277,7 @@ enum rxrpc_tx_point {
 	EM(rxrpc_client_chan_disconnect,	"ChDisc") \
 	EM(rxrpc_client_chan_pass,		"ChPass") \
 	EM(rxrpc_client_chan_unstarted,		"ChUnst") \
+	EM(rxrpc_client_chan_wait_failed,	"ChWtFl") \
 	EM(rxrpc_client_cleanup,		"Clean ") \
 	EM(rxrpc_client_count,			"Count ") \
 	EM(rxrpc_client_discard,		"Discar") \
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index b2adfa825363..f307a05076e1 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -704,6 +704,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 
 	ret = rxrpc_wait_for_channel(call, gfp);
 	if (ret < 0) {
+		trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
 		rxrpc_disconnect_client_call(call);
 		goto out;
 	}
@@ -774,16 +775,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
  */
 void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 {
-	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
 	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_channel *chan = &conn->channels[channel];
+	struct rxrpc_channel *chan = NULL;
 	struct rxrpc_net *rxnet = conn->params.local->rxnet;
-
-	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
-	call->conn = NULL;
+	unsigned int channel = -1;
+	u32 cid;
 
 	spin_lock(&conn->channel_lock);
 
+	cid = call->cid;
+	if (cid) {
+		channel = cid & RXRPC_CHANNELMASK;
+		chan = &conn->channels[channel];
+	}
+	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
+	call->conn = NULL;
+
 	/* Calls that have never actually been assigned a channel can simply be
 	 * discarded.  If the conn didn't get used either, it will follow
 	 * immediately unless someone else grabs it in the meantime.
@@ -807,7 +814,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 		goto out;
 	}
 
-	ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+	if (rcu_access_pointer(chan->call) != call) {
+		spin_unlock(&conn->channel_lock);
+		BUG();
+	}
 
 	/* If a client call was exposed to the world, we save the result for
 	 * retransmission.

From ad6c9986bcb627c7c22b8f9e9a934becc27df87c Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 8 Mar 2019 16:40:57 +0100
Subject: [PATCH 38/72] vxlan: Fix GRO cells race condition between receive and
 link delete

If we receive a packet while deleting a VXLAN device, there's a chance
vxlan_rcv() is called at the same time as vxlan_dellink(). This is fine,
except that vxlan_dellink() should never ever touch stuff that's still in
use, such as the GRO cells list.

Otherwise, vxlan_rcv() crashes while queueing packets via
gro_cells_receive().

Move the gro_cells_destroy() to vxlan_uninit(), which runs after the RCU
grace period is elapsed and nothing needs the gro_cells anymore.

This is now done in the same way as commit 8e816df87997 ("geneve: Use GRO
cells infrastructure.") originally implemented for GENEVE.

Reported-by: Jianlin Shi <jishi@redhat.com>
Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a3c46d78d216..76abd31e8d56 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2767,6 +2767,8 @@ static void vxlan_uninit(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 
+	gro_cells_destroy(&vxlan->gro_cells);
+
 	vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
 
 	free_percpu(dev->tstats);
@@ -3942,7 +3944,6 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head)
 
 	vxlan_flush(vxlan, true);
 
-	gro_cells_destroy(&vxlan->gro_cells);
 	list_del(&vxlan->next);
 	unregister_netdevice_queue(dev, head);
 }

From 89664c623617b1d34447a927ac7871ddf3db29d3 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 3 Mar 2019 17:54:53 +0800
Subject: [PATCH 39/72] sctp: sctp_sock_migrate() returns error if
 sctp_bind_addr_dup() fails

It should fail to create the new sk if sctp_bind_addr_dup() fails
when accepting or peeloff an association.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 533207dbeae9..44f2acb3d433 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -102,9 +102,9 @@ static int sctp_send_asconf(struct sctp_association *asoc,
 			    struct sctp_chunk *chunk);
 static int sctp_do_bind(struct sock *, union sctp_addr *, int);
 static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
-			      struct sctp_association *assoc,
-			      enum sctp_socket_type type);
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			     struct sctp_association *assoc,
+			     enum sctp_socket_type type);
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
@@ -4891,7 +4891,11 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
 	/* Populate the fields of the newsk from the oldsk and migrate the
 	 * asoc to the newsk.
 	 */
-	sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+	error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+	if (error) {
+		sk_common_release(newsk);
+		newsk = NULL;
+	}
 
 out:
 	release_sock(sk);
@@ -5639,7 +5643,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 	/* Populate the fields of the newsk from the oldsk and migrate the
 	 * asoc to the newsk.
 	 */
-	sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+	err = sctp_sock_migrate(sk, sock->sk, asoc,
+				SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+	if (err) {
+		sock_release(sock);
+		sock = NULL;
+	}
 
 	*sockp = sock;
 
@@ -9171,9 +9180,9 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
 /* Populate the fields of the newsk from the oldsk and migrate the assoc
  * and its messages to the newsk.
  */
-static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
-			      struct sctp_association *assoc,
-			      enum sctp_socket_type type)
+static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			     struct sctp_association *assoc,
+			     enum sctp_socket_type type)
 {
 	struct sctp_sock *oldsp = sctp_sk(oldsk);
 	struct sctp_sock *newsp = sctp_sk(newsk);
@@ -9182,6 +9191,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	struct sk_buff *skb, *tmp;
 	struct sctp_ulpevent *event;
 	struct sctp_bind_hashbucket *head;
+	int err;
 
 	/* Migrate socket buffer sizes and all the socket level options to the
 	 * new socket.
@@ -9210,8 +9220,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	/* Copy the bind_addr list from the original endpoint to the new
 	 * endpoint so that we can handle restarts properly
 	 */
-	sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
-				&oldsp->ep->base.bind_addr, GFP_KERNEL);
+	err = sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
+				 &oldsp->ep->base.bind_addr, GFP_KERNEL);
+	if (err)
+		return err;
 
 	/* Move any messages in the old socket's receive queue that are for the
 	 * peeled off association to the new socket's receive queue.
@@ -9296,6 +9308,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	}
 
 	release_sock(newsk);
+
+	return 0;
 }
 
 

From 60208f79139af0e2f84747d04a2f3321f174a398 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 3 Mar 2019 17:54:54 +0800
Subject: [PATCH 40/72] sctp: move up sctp_auth_init_hmacs() in
 sctp_endpoint_init()

sctp_auth_init_hmacs() is called only when ep->auth_enable is set.
It better to move up sctp_auth_init_hmacs() and remove auth_enable
check in it and check auth_enable only once in sctp_endpoint_init().

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c        |  6 ------
 net/sctp/endpointola.c | 18 ++++++++++--------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 5b537613946f..39d72e58b8e5 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -471,12 +471,6 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
 	struct crypto_shash *tfm = NULL;
 	__u16   id;
 
-	/* If AUTH extension is disabled, we are done */
-	if (!ep->auth_enable) {
-		ep->auth_hmacs = NULL;
-		return 0;
-	}
-
 	/* If the transforms are already allocated, we are done */
 	if (ep->auth_hmacs)
 		return 0;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 40c7eb941bc9..0448b68fce74 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -107,6 +107,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 			auth_chunks->param_hdr.length =
 					htons(sizeof(struct sctp_paramhdr) + 2);
 		}
+
+		/* Allocate and initialize transorms arrays for supported
+		 * HMACs.
+		 */
+		err = sctp_auth_init_hmacs(ep, gfp);
+		if (err)
+			goto nomem;
 	}
 
 	/* Initialize the base structure. */
@@ -150,15 +157,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	INIT_LIST_HEAD(&ep->endpoint_shared_keys);
 	null_key = sctp_auth_shkey_create(0, gfp);
 	if (!null_key)
-		goto nomem;
+		goto nomem_shkey;
 
 	list_add(&null_key->key_list, &ep->endpoint_shared_keys);
 
-	/* Allocate and initialize transorms arrays for supported HMACs. */
-	err = sctp_auth_init_hmacs(ep, gfp);
-	if (err)
-		goto nomem_hmacs;
-
 	/* Add the null key to the endpoint shared keys list and
 	 * set the hmcas and chunks pointers.
 	 */
@@ -169,8 +171,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 
 	return ep;
 
-nomem_hmacs:
-	sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
+nomem_shkey:
+	sctp_auth_destroy_hmacs(ep->auth_hmacs);
 nomem:
 	/* Free all allocations */
 	kfree(auth_hmacs);

From c6f33e05225696fee3c901b7526b80f31848454e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 3 Mar 2019 17:54:55 +0800
Subject: [PATCH 41/72] sctp: call sctp_auth_init_hmacs() in
 sctp_sock_migrate()

New ep's auth_hmacs should be set if old ep's is set, in case that
net->sctp.auth_enable has been changed to 0 by users and new ep's
auth_hmacs couldn't be set in sctp_endpoint_init().

It can even crash kernel by doing:

  1. on server: sysctl -w net.sctp.auth_enable=1,
                sysctl -w net.sctp.addip_enable=1,
                sysctl -w net.sctp.addip_noauth_enable=0,
                listen() on server,
                sysctl -w net.sctp.auth_enable=0.
  2. on client: connect() to server.
  3. on server: accept() the asoc,
                sysctl -w net.sctp.auth_enable=1.
  4. on client: send() asconf packet to server.

The call trace:

  [  245.280251] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
  [  245.286872] RIP: 0010:sctp_auth_calculate_hmac+0xa3/0x140 [sctp]
  [  245.304572] Call Trace:
  [  245.305091]  <IRQ>
  [  245.311287]  sctp_sf_authenticate+0x110/0x160 [sctp]
  [  245.312311]  sctp_sf_eat_auth+0xf2/0x230 [sctp]
  [  245.313249]  sctp_do_sm+0x9a/0x2d0 [sctp]
  [  245.321483]  sctp_assoc_bh_rcv+0xed/0x1a0 [sctp]
  [  245.322495]  sctp_rcv+0xa66/0xc70 [sctp]

It's because the old ep->auth_hmacs wasn't copied to the new ep while
ep->auth_hmacs is used in sctp_auth_calculate_hmac() when processing
the incoming auth chunks, and it should have been done when migrating
sock.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 44f2acb3d433..6140471efd4b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9225,6 +9225,16 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	if (err)
 		return err;
 
+	/* New ep's auth_hmacs should be set if old ep's is set, in case
+	 * that net->sctp.auth_enable has been changed to 0 by users and
+	 * new ep's auth_hmacs couldn't be set in sctp_endpoint_init().
+	 */
+	if (oldsp->ep->auth_hmacs) {
+		err = sctp_auth_init_hmacs(newsp->ep, GFP_KERNEL);
+		if (err)
+			return err;
+	}
+
 	/* Move any messages in the old socket's receive queue that are for the
 	 * peeled off association to the new socket's receive queue.
 	 */

From 634565f815561317f32191df57e11c05aa0b8297 Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@st.com>
Date: Tue, 5 Mar 2019 09:29:23 +0100
Subject: [PATCH 42/72] net: ethernet: stmmac: manage Ethernet WoL for
 stm32mp157c.

Add glue codes to support magic packet on stm32mp157c

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-stm32.c | 30 ++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 7e2e79dedebf..d1cf145b8986 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -42,6 +42,7 @@ struct stm32_dwmac {
 	struct clk *clk_ethstp;
 	struct clk *syscfg_clk;
 	bool int_phyclk;	/* Clock from RCC to drive PHY */
+	int irq_pwr_wakeup;
 	u32 mode_reg;		/* MAC glue-logic mode register */
 	struct regmap *regmap;
 	u32 speed;
@@ -232,7 +233,9 @@ static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac,
 static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 			       struct device *dev)
 {
+	struct platform_device *pdev = to_platform_device(dev);
 	struct device_node *np = dev->of_node;
+	int err = 0;
 
 	dwmac->int_phyclk = of_property_read_bool(np, "st,int-phyclk");
 
@@ -260,7 +263,26 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 		return PTR_ERR(dwmac->syscfg_clk);
 	}
 
-	return 0;
+	/* Get IRQ information early to have an ability to ask for deferred
+	 * probe if needed before we went too far with resource allocation.
+	 */
+	dwmac->irq_pwr_wakeup = platform_get_irq_byname(pdev,
+							"stm32_pwr_wakeup");
+	if (!dwmac->int_phyclk && dwmac->irq_pwr_wakeup >= 0) {
+		err = device_init_wakeup(&pdev->dev, true);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to init wake up irq\n");
+			return err;
+		}
+		err = dev_pm_set_dedicated_wake_irq(&pdev->dev,
+						    dwmac->irq_pwr_wakeup);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to set wake up irq\n");
+			device_init_wakeup(&pdev->dev, false);
+		}
+		device_set_wakeup_enable(&pdev->dev, false);
+	}
+	return err;
 }
 
 static int stm32_dwmac_probe(struct platform_device *pdev)
@@ -326,9 +348,15 @@ static int stm32_dwmac_remove(struct platform_device *pdev)
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	int ret = stmmac_dvr_remove(&pdev->dev);
+	struct stm32_dwmac *dwmac = priv->plat->bsp_priv;
 
 	stm32_dwmac_clk_disable(priv->plat->bsp_priv);
 
+	if (dwmac->irq_pwr_wakeup >= 0) {
+		dev_pm_clear_wake_irq(&pdev->dev);
+		device_init_wakeup(&pdev->dev, false);
+	}
+
 	return ret;
 }
 

From 22947335c4a69bbac72e402bc26edac05a627612 Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@st.com>
Date: Tue, 5 Mar 2019 09:29:24 +0100
Subject: [PATCH 43/72] net: ethernet: stmmac: update to support all PHY config
 for stm32mp157c.

Update glue codes to support all PHY config on stm32mp157c
 PHY_MODE	(MII,GMII, RMII, RGMII) and in normal, PHY wo crystal (25Mhz),
PHY wo crystal (50Mhz), No 125Mhz from PHY config.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-stm32.c | 107 ++++++++++++++----
 1 file changed, 86 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index d1cf145b8986..062a600fa5a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -25,9 +25,24 @@
 
 #define SYSCFG_MCU_ETH_MASK		BIT(23)
 #define SYSCFG_MP1_ETH_MASK		GENMASK(23, 16)
+#define SYSCFG_PMCCLRR_OFFSET		0x40
 
 #define SYSCFG_PMCR_ETH_CLK_SEL		BIT(16)
 #define SYSCFG_PMCR_ETH_REF_CLK_SEL	BIT(17)
+
+/*  Ethernet PHY interface selection in register SYSCFG Configuration
+ *------------------------------------------
+ * src	 |BIT(23)| BIT(22)| BIT(21)|BIT(20)|
+ *------------------------------------------
+ * MII   |   0	 |   0	  |   0    |   1   |
+ *------------------------------------------
+ * GMII  |   0	 |   0	  |   0    |   0   |
+ *------------------------------------------
+ * RGMII |   0	 |   0	  |   1	   |  n/a  |
+ *------------------------------------------
+ * RMII  |   1	 |   0	  |   0	   |  n/a  |
+ *------------------------------------------
+ */
 #define SYSCFG_PMCR_ETH_SEL_MII		BIT(20)
 #define SYSCFG_PMCR_ETH_SEL_RGMII	BIT(21)
 #define SYSCFG_PMCR_ETH_SEL_RMII	BIT(23)
@@ -35,15 +50,54 @@
 #define SYSCFG_MCU_ETH_SEL_MII		0
 #define SYSCFG_MCU_ETH_SEL_RMII		1
 
+/* STM32MP1 register definitions
+ *
+ * Below table summarizes the clock requirement and clock sources for
+ * supported phy interface modes.
+ * __________________________________________________________________________
+ *|PHY_MODE | Normal | PHY wo crystal|   PHY wo crystal   |No 125Mhz from PHY|
+ *|         |        |      25MHz    |        50MHz       |                  |
+ * ---------------------------------------------------------------------------
+ *|  MII    |	 -   |     eth-ck    |	      n/a	  |	  n/a        |
+ *|         |        |		     |                    |		     |
+ * ---------------------------------------------------------------------------
+ *|  GMII   |	 -   |     eth-ck    |	      n/a	  |	  n/a        |
+ *|         |        |               |                    |		     |
+ * ---------------------------------------------------------------------------
+ *| RGMII   |	 -   |     eth-ck    |	      n/a	  |  eth-ck (no pin) |
+ *|         |        |               |                    |  st,eth-clk-sel  |
+ * ---------------------------------------------------------------------------
+ *| RMII    |	 -   |     eth-ck    |	    eth-ck        |	  n/a        |
+ *|         |        |		     | st,eth-ref-clk-sel |		     |
+ * ---------------------------------------------------------------------------
+ *
+ * BIT(17) : set this bit in RMII mode when you have PHY without crystal 50MHz
+ * BIT(16) : set this bit in GMII/RGMII PHY when you do not want use 125Mhz
+ * from PHY
+ *-----------------------------------------------------
+ * src	 |         BIT(17)       |       BIT(16)      |
+ *-----------------------------------------------------
+ * MII   |           n/a	 |         n/a        |
+ *-----------------------------------------------------
+ * GMII  |           n/a         |   st,eth-clk-sel   |
+ *-----------------------------------------------------
+ * RGMII |           n/a         |   st,eth-clk-sel   |
+ *-----------------------------------------------------
+ * RMII  |   st,eth-ref-clk-sel	 |         n/a        |
+ *-----------------------------------------------------
+ *
+ */
+
 struct stm32_dwmac {
 	struct clk *clk_tx;
 	struct clk *clk_rx;
 	struct clk *clk_eth_ck;
 	struct clk *clk_ethstp;
 	struct clk *syscfg_clk;
-	bool int_phyclk;	/* Clock from RCC to drive PHY */
+	int eth_clk_sel_reg;
+	int eth_ref_clk_sel_reg;
 	int irq_pwr_wakeup;
-	u32 mode_reg;		/* MAC glue-logic mode register */
+	u32 mode_reg;		 /* MAC glue-logic mode register */
 	struct regmap *regmap;
 	u32 speed;
 	const struct stm32_ops *ops;
@@ -103,7 +157,7 @@ static int stm32mp1_clk_prepare(struct stm32_dwmac *dwmac, bool prepare)
 		if (ret)
 			return ret;
 
-		if (dwmac->int_phyclk) {
+		if (dwmac->clk_eth_ck) {
 			ret = clk_prepare_enable(dwmac->clk_eth_ck);
 			if (ret) {
 				clk_disable_unprepare(dwmac->syscfg_clk);
@@ -112,7 +166,7 @@ static int stm32mp1_clk_prepare(struct stm32_dwmac *dwmac, bool prepare)
 		}
 	} else {
 		clk_disable_unprepare(dwmac->syscfg_clk);
-		if (dwmac->int_phyclk)
+		if (dwmac->clk_eth_ck)
 			clk_disable_unprepare(dwmac->clk_eth_ck);
 	}
 	return ret;
@@ -122,7 +176,7 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 {
 	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
 	u32 reg = dwmac->mode_reg;
-	int val;
+	int val, ret;
 
 	switch (plat_dat->interface) {
 	case PHY_INTERFACE_MODE_MII:
@@ -131,19 +185,22 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 	case PHY_INTERFACE_MODE_GMII:
 		val = SYSCFG_PMCR_ETH_SEL_GMII;
-		if (dwmac->int_phyclk)
+		if (dwmac->eth_clk_sel_reg)
 			val |= SYSCFG_PMCR_ETH_CLK_SEL;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_GMII\n");
 		break;
 	case PHY_INTERFACE_MODE_RMII:
 		val = SYSCFG_PMCR_ETH_SEL_RMII;
-		if (dwmac->int_phyclk)
+		if (dwmac->eth_ref_clk_sel_reg)
 			val |= SYSCFG_PMCR_ETH_REF_CLK_SEL;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RMII\n");
 		break;
 	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
 		val = SYSCFG_PMCR_ETH_SEL_RGMII;
-		if (dwmac->int_phyclk)
+		if (dwmac->eth_clk_sel_reg)
 			val |= SYSCFG_PMCR_ETH_CLK_SEL;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RGMII\n");
 		break;
@@ -154,6 +211,11 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 		return -EINVAL;
 	}
 
+	/* Need to update PMCCLRR (clear register) */
+	ret = regmap_write(dwmac->regmap, reg + SYSCFG_PMCCLRR_OFFSET,
+			   dwmac->ops->syscfg_eth_mask);
+
+	/* Update PMCSETR (set register) */
 	return regmap_update_bits(dwmac->regmap, reg,
 				 dwmac->ops->syscfg_eth_mask, val);
 }
@@ -181,7 +243,7 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat)
 	}
 
 	return regmap_update_bits(dwmac->regmap, reg,
-				 dwmac->ops->syscfg_eth_mask, val);
+				 dwmac->ops->syscfg_eth_mask, val << 23);
 }
 
 static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac)
@@ -237,22 +299,25 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 	struct device_node *np = dev->of_node;
 	int err = 0;
 
-	dwmac->int_phyclk = of_property_read_bool(np, "st,int-phyclk");
+	/* Gigabit Ethernet 125MHz clock selection. */
+	dwmac->eth_clk_sel_reg = of_property_read_bool(np, "st,eth-clk-sel");
 
-	/* Check if internal clk from RCC selected */
-	if (dwmac->int_phyclk) {
-		/*  Get ETH_CLK clocks */
-		dwmac->clk_eth_ck = devm_clk_get(dev, "eth-ck");
-		if (IS_ERR(dwmac->clk_eth_ck)) {
-			dev_err(dev, "No ETH CK clock provided...\n");
-			return PTR_ERR(dwmac->clk_eth_ck);
-		}
+	/* Ethernet 50Mhz RMII clock selection */
+	dwmac->eth_ref_clk_sel_reg =
+		of_property_read_bool(np, "st,eth-ref-clk-sel");
+
+	/*  Get ETH_CLK clocks */
+	dwmac->clk_eth_ck = devm_clk_get(dev, "eth-ck");
+	if (IS_ERR(dwmac->clk_eth_ck)) {
+		dev_warn(dev, "No phy clock provided...\n");
+		dwmac->clk_eth_ck = NULL;
 	}
 
 	/*  Clock used for low power mode */
 	dwmac->clk_ethstp = devm_clk_get(dev, "ethstp");
 	if (IS_ERR(dwmac->clk_ethstp)) {
-		dev_err(dev, "No ETH peripheral clock provided for CStop mode ...\n");
+		dev_err(dev,
+			"No ETH peripheral clock provided for CStop mode ...\n");
 		return PTR_ERR(dwmac->clk_ethstp);
 	}
 
@@ -268,7 +333,7 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 	 */
 	dwmac->irq_pwr_wakeup = platform_get_irq_byname(pdev,
 							"stm32_pwr_wakeup");
-	if (!dwmac->int_phyclk && dwmac->irq_pwr_wakeup >= 0) {
+	if (!dwmac->clk_eth_ck && dwmac->irq_pwr_wakeup >= 0) {
 		err = device_init_wakeup(&pdev->dev, true);
 		if (err) {
 			dev_err(&pdev->dev, "Failed to init wake up irq\n");
@@ -370,7 +435,7 @@ static int stm32mp1_suspend(struct stm32_dwmac *dwmac)
 
 	clk_disable_unprepare(dwmac->clk_tx);
 	clk_disable_unprepare(dwmac->syscfg_clk);
-	if (dwmac->int_phyclk)
+	if (dwmac->clk_eth_ck)
 		clk_disable_unprepare(dwmac->clk_eth_ck);
 
 	return ret;

From 830133daec63d450e702e6196cec9f0413164164 Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@st.com>
Date: Tue, 5 Mar 2019 09:29:25 +0100
Subject: [PATCH 44/72] dt-bindings: net: stmmac: add phys config properties

Add properties to support all Phy config
 PHY_MODE	(MII,GMII, RMII, RGMII) and in normal, PHY wo crystal (25Mhz),
 PHY wo crystal (50Mhz), No 125Mhz from PHY config.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/stm32-dwmac.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
index 1341012722aa..3524e80c5e96 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
@@ -24,9 +24,9 @@ Required properties:
 	       encompases the glue register, and the offset of the control register.
 
 Optional properties:
-- clock-names:     For MPU family "mac-clk-ck" for PHY without quartz
-- st,int-phyclk (boolean) :  valid only where PHY do not have quartz and need to be clock
-	           by RCC
+- clock-names:     For MPU family "eth-ck" for PHY without quartz
+- st,eth-clk-sel (boolean) : set this property in RGMII PHY when you want to select RCC clock instead of ETH_CLK125.
+- st,eth-ref-clk-sel (boolean) :  set this property in RMII mode when you have PHY without crystal 50MHz and want to select RCC clock instead of ETH_REF_CLK.
 
 Example:
 

From 81311c03ab4dca83e4f4c678129b4327f2d41b40 Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@st.com>
Date: Tue, 5 Mar 2019 09:29:26 +0100
Subject: [PATCH 45/72] net: ethernet: stmmac: add management of clk_csr
 property

In Documentation stmmac.txt there is possibility to
fixed CSR Clock range selection with property clk_csr.
This patch add the management of this property
For example to use it, add in your ethernet node DT:
	clk_csr = <3>;

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 2b800ce1d5bf..3031f2bf15d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -408,6 +408,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	/* Default to phy auto-detection */
 	plat->phy_addr = -1;
 
+	/* Get clk_csr from device tree */
+	of_property_read_u32(np, "clk_csr", &plat->clk_csr);
+
 	/* "snps,phy-addr" is not a standard property. Mark it as deprecated
 	 * and warn of its use. Remove this when phy node support is added.
 	 */

From 835667999019b71114bec4534fcd6c895ebdcbdb Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@st.com>
Date: Tue, 5 Mar 2019 09:29:27 +0100
Subject: [PATCH 46/72] dt-bindings: net: stmmac: remove syscfg clock property

Syscfg clock is no more needed.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/stm32-dwmac.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
index 3524e80c5e96..a90eef11dc46 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
@@ -14,8 +14,7 @@ Required properties:
 - clock-names: Should be "stmmaceth" for the host clock.
 	       Should be "mac-clk-tx" for the MAC TX clock.
 	       Should be "mac-clk-rx" for the MAC RX clock.
-	       For MPU family need to add also "ethstp" for power mode clock and,
-	                                       "syscfg-clk" for SYSCFG clock.
+	       For MPU family need to add also "ethstp" for power mode clock
 - interrupt-names: Should contain a list of interrupt names corresponding to
            the interrupts in the interrupts property, if available.
 		   Should be "macirq" for the main MAC IRQ

From 5473f1be53c90252d3323e69e90a29379e608aaa Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@st.com>
Date: Tue, 5 Mar 2019 09:29:28 +0100
Subject: [PATCH 47/72] ARM: dts: stm32: Add Ethernet support on stm32h7 SOC
 and activate it for eval and disco boards

Synopsys GMAC 4.10 is used. And Phy mode for eval and disco is RMII
with PHY SMSC LAN8742

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/stm32h743-pinctrl.dtsi | 15 +++++++++++++++
 arch/arm/boot/dts/stm32h743.dtsi         | 13 +++++++++++++
 arch/arm/boot/dts/stm32h743i-disco.dts   | 17 +++++++++++++++++
 arch/arm/boot/dts/stm32h743i-eval.dts    | 17 +++++++++++++++++
 4 files changed, 62 insertions(+)

diff --git a/arch/arm/boot/dts/stm32h743-pinctrl.dtsi b/arch/arm/boot/dts/stm32h743-pinctrl.dtsi
index 24be8e63dec8..980b2769caf9 100644
--- a/arch/arm/boot/dts/stm32h743-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stm32h743-pinctrl.dtsi
@@ -173,6 +173,21 @@ pins {
 				};
 			};
 
+			ethernet_rmii: rmii@0 {
+				pins {
+					pinmux = <STM32_PINMUX('G', 11, AF11)>,
+						 <STM32_PINMUX('G', 13, AF11)>,
+						 <STM32_PINMUX('G', 12, AF11)>,
+						 <STM32_PINMUX('C', 4, AF11)>,
+						 <STM32_PINMUX('C', 5, AF11)>,
+						 <STM32_PINMUX('A', 7, AF11)>,
+						 <STM32_PINMUX('C', 1, AF11)>,
+						 <STM32_PINMUX('A', 2, AF11)>,
+						 <STM32_PINMUX('A', 1, AF11)>;
+					slew-rate = <2>;
+				};
+			};
+
 			usart1_pins: usart1@0 {
 				pins1 {
 					pinmux = <STM32_PINMUX('B', 14, AF4)>; /* USART1_TX */
diff --git a/arch/arm/boot/dts/stm32h743.dtsi b/arch/arm/boot/dts/stm32h743.dtsi
index cbdd69ca9e7a..e103b29969ca 100644
--- a/arch/arm/boot/dts/stm32h743.dtsi
+++ b/arch/arm/boot/dts/stm32h743.dtsi
@@ -511,6 +511,19 @@ adc3: adc@0 {
 				status = "disabled";
 			};
 		};
+
+		mac: ethernet@40028000 {
+			compatible = "st,stm32-dwmac", "snps,dwmac-4.10a";
+			reg = <0x40028000 0x8000>;
+			reg-names = "stmmaceth";
+			interrupts = <61>;
+			interrupt-names = "macirq";
+			clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx";
+			clocks = <&rcc ETH1MAC_CK>, <&rcc ETH1TX_CK>, <&rcc ETH1RX_CK>;
+			st,syscon = <&syscfg 0x4>;
+			snps,pbl = <8>;
+			status = "disabled";
+		};
 	};
 };
 
diff --git a/arch/arm/boot/dts/stm32h743i-disco.dts b/arch/arm/boot/dts/stm32h743i-disco.dts
index 45e088c55741..83ef63d515a8 100644
--- a/arch/arm/boot/dts/stm32h743i-disco.dts
+++ b/arch/arm/boot/dts/stm32h743i-disco.dts
@@ -66,6 +66,23 @@ &clk_hse {
 	clock-frequency = <25000000>;
 };
 
+&mac {
+	status = "disabled";
+	pinctrl-0	= <&ethernet_rmii>;
+	pinctrl-names	= "default";
+	phy-mode	= "rmii";
+	phy-handle	= <&phy0>;
+
+	mdio0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "snps,dwmac-mdio";
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+		};
+	};
+};
+
 &usart2 {
 	pinctrl-0 = <&usart2_pins>;
 	pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/stm32h743i-eval.dts b/arch/arm/boot/dts/stm32h743i-eval.dts
index 3f8e0c4a998d..383c5bb03720 100644
--- a/arch/arm/boot/dts/stm32h743i-eval.dts
+++ b/arch/arm/boot/dts/stm32h743i-eval.dts
@@ -104,6 +104,23 @@ &rtc {
 	status = "okay";
 };
 
+&mac {
+	status = "disabled";
+	pinctrl-0	= <&ethernet_rmii>;
+	pinctrl-names	= "default";
+	phy-mode	= "rmii";
+	phy-handle	= <&phy0>;
+
+	mdio0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "snps,dwmac-mdio";
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+		};
+	};
+};
+
 &usart1 {
 	pinctrl-0 = <&usart1_pins>;
 	pinctrl-names = "default";

From 243b4cdab981d7004bee56c38e18a29da32ef104 Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@tehnerd.com>
Date: Fri, 8 Mar 2019 05:18:14 +0000
Subject: [PATCH 48/72] bpf, libbpf: fixing leak when kernel does not support
 btf

We could end up in situation when we have object file w/ all btf
info, but kernel does not support btf yet. In this situation
currently libbpf just set obj->btf to NULL w/o freeing it first.
This patch is fixing it by making sure to run btf__free first.

Fixes: d29d87f7e612 ("btf: separate btf creation and loading")
Signed-off-by: Nikita V. Shirokov <tehnerd@tehnerd.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f5eb60379c8d..d5b830d60601 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -838,6 +838,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			if (IS_ERR(obj->btf) || btf__load(obj->btf)) {
 				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
 					   BTF_ELF_SEC, PTR_ERR(obj->btf));
+				if (!IS_ERR(obj->btf))
+					btf__free(obj->btf);
 				obj->btf = NULL;
 			}
 		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {

From f54ba391d88f5a5d032175b4c308c176e34b80b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Fri, 8 Mar 2019 08:57:26 +0100
Subject: [PATCH 49/72] xsk: fix to reject invalid flags in xsk_bind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Passing a non-existing flag in the sxdp_flags member of struct
sockaddr_xdp was, incorrectly, silently ignored. This patch addresses
that behavior, and rejects any non-existing flags.

We have examined existing user space code, and to our best knowledge,
no one is relying on the current incorrect behavior. AF_XDP is still
in its infancy, so from our perspective, the risk of breakage is very
low, and addressing this problem now is important.

Fixes: 965a99098443 ("xsk: add support for bind for Rx")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 6697084e3fdf..a14e8864e4fa 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -407,6 +407,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	if (sxdp->sxdp_family != AF_XDP)
 		return -EINVAL;
 
+	flags = sxdp->sxdp_flags;
+	if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
+		return -EINVAL;
+
 	mutex_lock(&xs->mutex);
 	if (xs->dev) {
 		err = -EBUSY;
@@ -425,7 +429,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	}
 
 	qid = sxdp->sxdp_queue_id;
-	flags = sxdp->sxdp_flags;
 
 	if (flags & XDP_SHARED_UMEM) {
 		struct xdp_sock *umem_xs;

From c57b557b644da624982c36b74f608cdb7b902868 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Fri, 8 Mar 2019 08:57:27 +0100
Subject: [PATCH 50/72] xsk: fix to reject invalid options in Tx descriptor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Passing a non-existing option in the options member of struct
xdp_desc was, incorrectly, silently ignored. This patch addresses
that behavior, and drops any Tx descriptor with non-existing options.

We have examined existing user space code, and to our best knowledge,
no one is relying on the current incorrect behavior. AF_XDP is still
in its infancy, so from our perspective, the risk of breakage is very
low, and addressing this problem now is important.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk_queue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index bcb5cbb40419..610c0bdc0c2b 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -174,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
 	if (!xskq_is_valid_addr(q, d->addr))
 		return false;
 
-	if (((d->addr + d->len) & q->chunk_mask) !=
-	    (d->addr & q->chunk_mask)) {
+	if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
+	    d->options) {
 		q->invalid_descs++;
 		return false;
 	}

From 71b91a506bb05f9aef3acd57af2e835d85721942 Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Fri, 8 Mar 2019 01:45:51 -0500
Subject: [PATCH 51/72] bpf: fix warning about using plain integer as NULL

Sparse warning below:

sudo make C=2 CF=-D__CHECK_ENDIAN__ M=net/bpf/
CHECK   net/bpf//test_run.c
net/bpf//test_run.c:19:77: warning: Using plain integer as NULL pointer
./include/linux/bpf-cgroup.h:295:77: warning: Using plain integer as NULL pointer

Fixes: 8bad74f9840f ("bpf: extend cgroup bpf core to allow multiple cgroup storage types")
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 2 +-
 net/bpf/test_run.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 695b2a880d9a..a4c644c1c091 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -292,7 +292,7 @@ static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
 					      struct bpf_map *map) {}
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
-	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
+	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
 static inline void bpf_cgroup_storage_free(
 	struct bpf_cgroup_storage *storage) {}
 static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index da7051d62727..fab142b796ef 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -16,7 +16,7 @@
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 			u32 *retval, u32 *time)
 {
-	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
 	enum bpf_cgroup_storage_type stype;
 	u64 time_start, time_spent = 0;
 	int ret = 0;

From 161e613755e93c45cc47e75ab046f0f8de9e6d49 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Tue, 5 Mar 2019 11:35:54 -0300
Subject: [PATCH 52/72] net: add missing documentation in linux/skbuff.h

This patch adds missing documentation for some inline functions on
linux/skbuff.h. The patch is incomplete and a lot more can be added,
just wondering if it's of interest of the netdev developers.

Also fixed some whitespaces.

Signed-off-by: Pedro Tammela <pctammela@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 64 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 27beb549ffbe..730b333be591 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -327,26 +327,49 @@ struct skb_frag_struct {
 #endif
 };
 
+/**
+ * skb_frag_size - Returns the size of a skb fragment
+ * @frag: skb fragment
+ */
 static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 {
 	return frag->size;
 }
 
+/**
+ * skb_frag_size_set - Sets the size of a skb fragment
+ * @frag: skb fragment
+ * @size: size of fragment
+ */
 static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
 {
 	frag->size = size;
 }
 
+/**
+ * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
+ * @frag: skb fragment
+ * @delta: value to add
+ */
 static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
 {
 	frag->size += delta;
 }
 
+/**
+ * skb_frag_size_sub - Decrements the size of a skb fragment by %delta
+ * @frag: skb fragment
+ * @delta: value to subtract
+ */
 static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
 {
 	frag->size -= delta;
 }
 
+/**
+ * skb_frag_must_loop - Test if %p is a high memory page
+ * @p: fragment's page
+ */
 static inline bool skb_frag_must_loop(struct page *p)
 {
 #if defined(CONFIG_HIGHMEM)
@@ -590,7 +613,7 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
-/** 
+/**
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
  *	@prev: Previous buffer in list
@@ -648,7 +671,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
  *	@dst_pending_confirm: need to confirm neighbour
  *	@decrypted: Decrypted SKB
-  *	@napi_id: id of the NAPI struct this skb came from
+ *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
  *	@vlan_proto: vlan encapsulation protocol
@@ -883,7 +906,10 @@ struct sk_buff {
 #define SKB_ALLOC_RX		0x02
 #define SKB_ALLOC_NAPI		0x04
 
-/* Returns true if the skb was allocated from PFMEMALLOC reserves */
+/**
+ * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves
+ * @skb: buffer
+ */
 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 {
 	return unlikely(skb->pfmemalloc);
@@ -905,7 +931,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb)
  */
 static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 {
-	/* If refdst was not refcounted, check we still are in a 
+	/* If refdst was not refcounted, check we still are in a
 	 * rcu_read_lock section
 	 */
 	WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
@@ -952,6 +978,10 @@ static inline bool skb_dst_is_noref(const struct sk_buff *skb)
 	return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
 }
 
+/**
+ * skb_rtable - Returns the skb &rtable
+ * @skb: buffer
+ */
 static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 {
 	return (struct rtable *)skb_dst(skb);
@@ -966,6 +996,10 @@ static inline bool skb_pkt_type_ok(u32 ptype)
 	return ptype <= PACKET_OTHERHOST;
 }
 
+/**
+ * skb_napi_id - Returns the skb's NAPI id
+ * @skb: buffer
+ */
 static inline unsigned int skb_napi_id(const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -975,7 +1009,12 @@ static inline unsigned int skb_napi_id(const struct sk_buff *skb)
 #endif
 }
 
-/* decrement the reference count and return true if we can free the skb */
+/**
+ * skb_unref - decrement the skb's reference count
+ * @skb: buffer
+ *
+ * Returns true if we can free the skb.
+ */
 static inline bool skb_unref(struct sk_buff *skb)
 {
 	if (unlikely(!skb))
@@ -1005,6 +1044,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
 			    int node);
 struct sk_buff *__build_skb(void *data, unsigned int frag_size);
 struct sk_buff *build_skb(void *data, unsigned int frag_size);
+
+/**
+ * alloc_skb - allocate a network buffer
+ * @size: size to allocate
+ * @priority: allocation mask
+ *
+ * This function is a convenient wrapper around __alloc_skb().
+ */
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
@@ -1047,6 +1094,13 @@ static inline bool skb_fclone_busy(const struct sock *sk,
 	       fclones->skb2.sk == sk;
 }
 
+/**
+ * alloc_skb_fclone - allocate a network buffer from fclone cache
+ * @size: size to allocate
+ * @priority: allocation mask
+ *
+ * This function is a convenient wrapper around __alloc_skb().
+ */
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {

From d394d33bee22421b39a0bcdc51ca6d68ba308625 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Wed, 6 Mar 2019 11:26:37 +0800
Subject: [PATCH 53/72] net: hns3: add dma_rmb() for rx description

HW can not guarantee complete write desc->rx.size, even though
HNS3_RXD_VLD_B has been set. Driver needs to add dma_rmb()
instruction to make sure desc->rx.size is always valid.

Fixes: e55970950556 ("net: hns3: Add handling of GRO Pkts not fully RX'ed in NAPI poll")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 1e4efc47c7a5..0d1ae15a5927 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2472,6 +2472,8 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
 		desc = &ring->desc[ring->next_to_clean];
 		desc_cb = &ring->desc_cb[ring->next_to_clean];
 		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+		/* make sure HW write desc complete */
+		dma_rmb();
 		if (!(bd_base_info & BIT(HNS3_RXD_VLD_B)))
 			return -ENXIO;
 

From f98ec788511b5e06b1ca668d380d42cd6742a75a Mon Sep 17 00:00:00 2001
From: Litao Jiao <jiaolitao@raisecom.com>
Date: Wed, 6 Mar 2019 12:01:48 +0800
Subject: [PATCH 54/72] vxlan: do not need BH again in vxlan_cleanup()

vxlan_cleanup() is a timer callback, it is already
and only running in BH context.

Signed-off-by: Litao Jiao <jiaolitao@raisecom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 76abd31e8d56..7610c51b33a0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2690,7 +2690,7 @@ static void vxlan_cleanup(struct timer_list *t)
 	for (h = 0; h < FDB_HASH_SIZE; ++h) {
 		struct hlist_node *p, *n;
 
-		spin_lock_bh(&vxlan->hash_lock);
+		spin_lock(&vxlan->hash_lock);
 		hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
 			struct vxlan_fdb *f
 				= container_of(p, struct vxlan_fdb, hlist);
@@ -2712,7 +2712,7 @@ static void vxlan_cleanup(struct timer_list *t)
 			} else if (time_before(timeout, next_timer))
 				next_timer = timeout;
 		}
-		spin_unlock_bh(&vxlan->hash_lock);
+		spin_unlock(&vxlan->hash_lock);
 	}
 
 	mod_timer(&vxlan->age_timer, next_timer);

From 6d2b0f02f5a07a4bf02e4cbc90d7eaa85cac2986 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Wed, 6 Mar 2019 14:46:27 +0800
Subject: [PATCH 55/72] connector: fix unsafe usage of ->real_parent

proc_exit_connector() uses ->real_parent lockless. This is not
safe that its parent can go away at any moment, so use RCU to
protect it, and ensure that this task is not released.

[  747.624551] ==================================================================
[  747.632946] BUG: KASAN: use-after-free in proc_exit_connector+0x1f7/0x310
[  747.640686] Read of size 4 at addr ffff88a0276988e0 by task sshd/2882
[  747.648032]
[  747.649804] CPU: 11 PID: 2882 Comm: sshd Tainted: G            E     4.19.26-rc2 #11
[  747.658629] Hardware name: IBM x3550M4 -[7914OFV]-/00AM544, BIOS -[D7E142BUS-1.71]- 07/31/2014
[  747.668419] Call Trace:
[  747.671269]  dump_stack+0xf0/0x19b
[  747.675186]  ? show_regs_print_info+0x5/0x5
[  747.679988]  ? kmsg_dump_rewind_nolock+0x59/0x59
[  747.685302]  print_address_description+0x6a/0x270
[  747.691162]  kasan_report+0x258/0x380
[  747.695835]  ? proc_exit_connector+0x1f7/0x310
[  747.701402]  proc_exit_connector+0x1f7/0x310
[  747.706767]  ? proc_coredump_connector+0x2d0/0x2d0
[  747.712715]  ? _raw_write_unlock_irq+0x29/0x50
[  747.718270]  ? _raw_write_unlock_irq+0x29/0x50
[  747.723820]  ? ___preempt_schedule+0x16/0x18
[  747.729193]  ? ___preempt_schedule+0x16/0x18
[  747.734574]  do_exit+0xa11/0x14f0
[  747.738880]  ? mm_update_next_owner+0x590/0x590
[  747.744525]  ? debug_show_all_locks+0x3c0/0x3c0
[  747.761448]  ? ktime_get_coarse_real_ts64+0xeb/0x1c0
[  747.767589]  ? lockdep_hardirqs_on+0x1a6/0x290
[  747.773154]  ? check_chain_key+0x139/0x1f0
[  747.778345]  ? check_flags.part.35+0x240/0x240
[  747.783908]  ? __lock_acquire+0x2300/0x2300
[  747.789171]  ? _raw_spin_unlock_irqrestore+0x59/0x70
[  747.795316]  ? _raw_spin_unlock_irqrestore+0x59/0x70
[  747.801457]  ? do_raw_spin_unlock+0x10f/0x1e0
[  747.806914]  ? do_raw_spin_trylock+0x120/0x120
[  747.812481]  ? preempt_count_sub+0x14/0xc0
[  747.817645]  ? _raw_spin_unlock+0x2e/0x50
[  747.822708]  ? __handle_mm_fault+0x12db/0x1fa0
[  747.828367]  ? __pmd_alloc+0x2d0/0x2d0
[  747.833143]  ? check_noncircular+0x50/0x50
[  747.838309]  ? match_held_lock+0x7f/0x340
[  747.843380]  ? check_noncircular+0x50/0x50
[  747.848561]  ? handle_mm_fault+0x21a/0x5f0
[  747.853730]  ? check_flags.part.35+0x240/0x240
[  747.859290]  ? check_chain_key+0x139/0x1f0
[  747.864474]  ? __do_page_fault+0x40f/0x760
[  747.869655]  ? __audit_syscall_entry+0x4b/0x1f0
[  747.875319]  ? syscall_trace_enter+0x1d5/0x7b0
[  747.880877]  ? trace_raw_output_preemptirq_template+0x90/0x90
[  747.887895]  ? trace_raw_output_sys_exit+0x80/0x80
[  747.893860]  ? up_read+0x3b/0x90
[  747.898142]  ? stop_critical_timings+0x260/0x260
[  747.903909]  do_group_exit+0xe0/0x1c0
[  747.908591]  ? __x64_sys_exit+0x30/0x30
[  747.913460]  ? trace_raw_output_preemptirq_template+0x90/0x90
[  747.920485]  ? tracer_hardirqs_on+0x270/0x270
[  747.925956]  __x64_sys_exit_group+0x28/0x30
[  747.931214]  do_syscall_64+0x117/0x400
[  747.935988]  ? syscall_return_slowpath+0x2f0/0x2f0
[  747.941931]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  747.947788]  ? trace_hardirqs_on_caller+0x1d0/0x1d0
[  747.953838]  ? lockdep_sys_exit+0x16/0x8e
[  747.958915]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  747.964784]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  747.971021] RIP: 0033:0x7f572f154c68
[  747.975606] Code: Bad RIP value.
[  747.979791] RSP: 002b:00007ffed2dfaa58 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
[  747.989324] RAX: ffffffffffffffda RBX: 00007f572f431840 RCX: 00007f572f154c68
[  747.997910] RDX: 0000000000000001 RSI: 000000000000003c RDI: 0000000000000001
[  748.006495] RBP: 0000000000000001 R08: 00000000000000e7 R09: fffffffffffffee0
[  748.015079] R10: 00007f572f4387e8 R11: 0000000000000246 R12: 00007f572f431840
[  748.023664] R13: 000055a7f90f2c50 R14: 000055a7f96e2310 R15: 000055a7f96e2310
[  748.032287]
[  748.034509] Allocated by task 2300:
[  748.038982]  kasan_kmalloc+0xa0/0xd0
[  748.043562]  kmem_cache_alloc_node+0xf5/0x2e0
[  748.049018]  copy_process+0x1781/0x4790
[  748.053884]  _do_fork+0x166/0x9a0
[  748.058163]  do_syscall_64+0x117/0x400
[  748.062943]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  748.069180]
[  748.071405] Freed by task 15395:
[  748.075591]  __kasan_slab_free+0x130/0x180
[  748.080752]  kmem_cache_free+0xc2/0x310
[  748.085619]  free_task+0xea/0x130
[  748.089901]  __put_task_struct+0x177/0x230
[  748.095063]  finish_task_switch+0x51b/0x5d0
[  748.100315]  __schedule+0x506/0xfa0
[  748.104791]  schedule+0xca/0x260
[  748.108978]  futex_wait_queue_me+0x27e/0x420
[  748.114333]  futex_wait+0x251/0x550
[  748.118814]  do_futex+0x75b/0xf80
[  748.123097]  __x64_sys_futex+0x231/0x2a0
[  748.128065]  do_syscall_64+0x117/0x400
[  748.132835]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  748.139066]
[  748.141289] The buggy address belongs to the object at ffff88a027698000
[  748.141289]  which belongs to the cache task_struct of size 12160
[  748.156589] The buggy address is located 2272 bytes inside of
[  748.156589]  12160-byte region [ffff88a027698000, ffff88a02769af80)
[  748.171114] The buggy address belongs to the page:
[  748.177055] page:ffffea00809da600 count:1 mapcount:0 mapping:ffff888107d01e00 index:0x0 compound_mapcount: 0
[  748.189136] flags: 0x57ffffc0008100(slab|head)
[  748.194688] raw: 0057ffffc0008100 ffffea00809a3200 0000000300000003 ffff888107d01e00
[  748.204424] raw: 0000000000000000 0000000000020002 00000001ffffffff 0000000000000000
[  748.214146] page dumped because: kasan: bad access detected
[  748.220976]
[  748.223197] Memory state around the buggy address:
[  748.229128]  ffff88a027698780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.238271]  ffff88a027698800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.247414] >ffff88a027698880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.256564]                                                        ^
[  748.264267]  ffff88a027698900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.273493]  ffff88a027698980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.282630] ==================================================================

Fixes: b086ff87251b4a4 ("connector: add parent pid and tgid to coredump and exit events")
Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_proc.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index ed5e42461094..ad48fd52cb53 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -250,6 +250,7 @@ void proc_coredump_connector(struct task_struct *task)
 {
 	struct cn_msg *msg;
 	struct proc_event *ev;
+	struct task_struct *parent;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
 
 	if (atomic_read(&proc_event_num_listeners) < 1)
@@ -262,8 +263,14 @@ void proc_coredump_connector(struct task_struct *task)
 	ev->what = PROC_EVENT_COREDUMP;
 	ev->event_data.coredump.process_pid = task->pid;
 	ev->event_data.coredump.process_tgid = task->tgid;
-	ev->event_data.coredump.parent_pid = task->real_parent->pid;
-	ev->event_data.coredump.parent_tgid = task->real_parent->tgid;
+
+	rcu_read_lock();
+	if (pid_alive(task)) {
+		parent = rcu_dereference(task->real_parent);
+		ev->event_data.coredump.parent_pid = parent->pid;
+		ev->event_data.coredump.parent_tgid = parent->tgid;
+	}
+	rcu_read_unlock();
 
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
@@ -276,6 +283,7 @@ void proc_exit_connector(struct task_struct *task)
 {
 	struct cn_msg *msg;
 	struct proc_event *ev;
+	struct task_struct *parent;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
 
 	if (atomic_read(&proc_event_num_listeners) < 1)
@@ -290,8 +298,14 @@ void proc_exit_connector(struct task_struct *task)
 	ev->event_data.exit.process_tgid = task->tgid;
 	ev->event_data.exit.exit_code = task->exit_code;
 	ev->event_data.exit.exit_signal = task->exit_signal;
-	ev->event_data.exit.parent_pid = task->real_parent->pid;
-	ev->event_data.exit.parent_tgid = task->real_parent->tgid;
+
+	rcu_read_lock();
+	if (pid_alive(task)) {
+		parent = rcu_dereference(task->real_parent);
+		ev->event_data.exit.parent_pid = parent->pid;
+		ev->event_data.exit.parent_tgid = parent->tgid;
+	}
+	rcu_read_unlock();
 
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */

From 4c404ce23358d5d8fbdeb7a6021a9b33d3c3c167 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adalbert=20Laz=C4=83r?= <alazar@bitdefender.com>
Date: Wed, 6 Mar 2019 12:13:53 +0200
Subject: [PATCH 56/72] vsock/virtio: fix kernel panic from
 virtio_transport_reset_no_sock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous to commit 22b5c0b63f32 ("vsock/virtio: fix kernel panic
after device hot-unplug"), vsock_core_init() was called from
virtio_vsock_probe(). Now, virtio_transport_reset_no_sock() can be called
before vsock_core_init() has the chance to run.

[Wed Feb 27 14:17:09 2019] BUG: unable to handle kernel NULL pointer dereference at 0000000000000110
[Wed Feb 27 14:17:09 2019] #PF error: [normal kernel read fault]
[Wed Feb 27 14:17:09 2019] PGD 0 P4D 0
[Wed Feb 27 14:17:09 2019] Oops: 0000 [#1] SMP PTI
[Wed Feb 27 14:17:09 2019] CPU: 3 PID: 59 Comm: kworker/3:1 Not tainted 5.0.0-rc7-390-generic-hvi #390
[Wed Feb 27 14:17:09 2019] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[Wed Feb 27 14:17:09 2019] Workqueue: virtio_vsock virtio_transport_rx_work [vmw_vsock_virtio_transport]
[Wed Feb 27 14:17:09 2019] RIP: 0010:virtio_transport_reset_no_sock+0x8c/0xc0 [vmw_vsock_virtio_transport_common]
[Wed Feb 27 14:17:09 2019] Code: 35 8b 4f 14 48 8b 57 08 31 f6 44 8b 4f 10 44 8b 07 48 8d 7d c8 e8 84 f8 ff ff 48 85 c0 48 89 c3 74 2a e8 f7 31 03 00 48 89 df <48> 8b 80 10 01 00 00 e8 68 fb 69 ed 48 8b 75 f0 65 48 33 34 25 28
[Wed Feb 27 14:17:09 2019] RSP: 0018:ffffb42701ab7d40 EFLAGS: 00010282
[Wed Feb 27 14:17:09 2019] RAX: 0000000000000000 RBX: ffff9d79637ee080 RCX: 0000000000000003
[Wed Feb 27 14:17:09 2019] RDX: 0000000000000001 RSI: 0000000000000002 RDI: ffff9d79637ee080
[Wed Feb 27 14:17:09 2019] RBP: ffffb42701ab7d78 R08: ffff9d796fae70e0 R09: ffff9d796f403500
[Wed Feb 27 14:17:09 2019] R10: ffffb42701ab7d90 R11: 0000000000000000 R12: ffff9d7969d09240
[Wed Feb 27 14:17:09 2019] R13: ffff9d79624e6840 R14: ffff9d7969d09318 R15: ffff9d796d48ff80
[Wed Feb 27 14:17:09 2019] FS:  0000000000000000(0000) GS:ffff9d796fac0000(0000) knlGS:0000000000000000
[Wed Feb 27 14:17:09 2019] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[Wed Feb 27 14:17:09 2019] CR2: 0000000000000110 CR3: 0000000427f22000 CR4: 00000000000006e0
[Wed Feb 27 14:17:09 2019] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[Wed Feb 27 14:17:09 2019] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[Wed Feb 27 14:17:09 2019] Call Trace:
[Wed Feb 27 14:17:09 2019]  virtio_transport_recv_pkt+0x63/0x820 [vmw_vsock_virtio_transport_common]
[Wed Feb 27 14:17:09 2019]  ? kfree+0x17e/0x190
[Wed Feb 27 14:17:09 2019]  ? detach_buf_split+0x145/0x160
[Wed Feb 27 14:17:09 2019]  ? __switch_to_asm+0x40/0x70
[Wed Feb 27 14:17:09 2019]  virtio_transport_rx_work+0xa0/0x106 [vmw_vsock_virtio_transport]
[Wed Feb 27 14:17:09 2019] NET: Registered protocol family 40
[Wed Feb 27 14:17:09 2019]  process_one_work+0x167/0x410
[Wed Feb 27 14:17:09 2019]  worker_thread+0x4d/0x460
[Wed Feb 27 14:17:09 2019]  kthread+0x105/0x140
[Wed Feb 27 14:17:09 2019]  ? rescuer_thread+0x360/0x360
[Wed Feb 27 14:17:09 2019]  ? kthread_destroy_worker+0x50/0x50
[Wed Feb 27 14:17:09 2019]  ret_from_fork+0x35/0x40
[Wed Feb 27 14:17:09 2019] Modules linked in: vmw_vsock_virtio_transport vmw_vsock_virtio_transport_common input_leds vsock serio_raw i2c_piix4 mac_hid qemu_fw_cfg autofs4 cirrus ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio_net psmouse drm net_failover pata_acpi virtio_blk failover floppy

Fixes: 22b5c0b63f32 ("vsock/virtio: fix kernel panic after device hot-unplug")
Reported-by: Alexandru Herghelegiu <aherghelegiu@bitdefender.com>
Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
Co-developed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 3ae3a33da70b..602715fc9a75 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -662,6 +662,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
  */
 static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
 {
+	const struct virtio_transport *t;
+	struct virtio_vsock_pkt *reply;
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_RST,
 		.type = le16_to_cpu(pkt->hdr.type),
@@ -672,15 +674,21 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
 	if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
 		return 0;
 
-	pkt = virtio_transport_alloc_pkt(&info, 0,
-					 le64_to_cpu(pkt->hdr.dst_cid),
-					 le32_to_cpu(pkt->hdr.dst_port),
-					 le64_to_cpu(pkt->hdr.src_cid),
-					 le32_to_cpu(pkt->hdr.src_port));
-	if (!pkt)
+	reply = virtio_transport_alloc_pkt(&info, 0,
+					   le64_to_cpu(pkt->hdr.dst_cid),
+					   le32_to_cpu(pkt->hdr.dst_port),
+					   le64_to_cpu(pkt->hdr.src_cid),
+					   le32_to_cpu(pkt->hdr.src_port));
+	if (!reply)
 		return -ENOMEM;
 
-	return virtio_transport_get_ops()->send_pkt(pkt);
+	t = virtio_transport_get_ops();
+	if (!t) {
+		virtio_transport_free_pkt(reply);
+		return -ENOTCONN;
+	}
+
+	return t->send_pkt(reply);
 }
 
 static void virtio_transport_wait_close(struct sock *sk, long timeout)

From 81bf7bbeabd241326f4edc97f4f5ba366f21cbe0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Mar 2019 12:05:49 +0100
Subject: [PATCH 57/72] vhost: silence an unused-variable warning

On some architectures, the MMU can be disabled, leading to access_ok()
becoming an empty macro that does not evaluate its size argument,
which in turn produces an unused-variable warning:

drivers/vhost/vhost.c:1191:9: error: unused variable 's' [-Werror,-Wunused-variable]
        size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;

Mark the variable as __maybe_unused to shut up that warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index a2e5dc7716e2..5ace833de746 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1188,7 +1188,7 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
 			 struct vring_used __user *used)
 
 {
-	size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+	size_t s __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 
 	return access_ok(desc, num * sizeof *desc) &&
 	       access_ok(avail,

From b62989fc4ea27863e7aef00c93a10118324d3ed0 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Wed, 6 Mar 2019 17:50:43 +0200
Subject: [PATCH 58/72] net: sched: fix potential use-after-free in
 __tcf_chain_put()

When used with unlocked classifier that have filters attached to actions
with goto chain, __tcf_chain_put() for last non action reference can race
with calls to same function from action cleanup code that releases last
action reference. In this case action cleanup handler could free the chain
if it executes after all references to chain were released, but before all
concurrent users finished using it. Modify __tcf_chain_put() to only access
tcf_chain fields when holding block->lock. Remove local variables that were
used to cache some tcf_chain fields and are no longer needed because their
values can now be obtained directly from chain under block->lock
protection.

Fixes: 726d061286ce ("net: sched: prevent insertion of new classifiers during chain flush")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 478095d50f95..2c2aac4ac721 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -470,10 +470,9 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
 {
 	struct tcf_block *block = chain->block;
 	const struct tcf_proto_ops *tmplt_ops;
-	bool is_last, free_block = false;
+	bool free_block = false;
 	unsigned int refcnt;
 	void *tmplt_priv;
-	u32 chain_index;
 
 	mutex_lock(&block->lock);
 	if (explicitly_created) {
@@ -492,23 +491,21 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
 	 * save these to temporary variables.
 	 */
 	refcnt = --chain->refcnt;
-	is_last = refcnt - chain->action_refcnt == 0;
 	tmplt_ops = chain->tmplt_ops;
 	tmplt_priv = chain->tmplt_priv;
-	chain_index = chain->index;
-
-	if (refcnt == 0)
-		free_block = tcf_chain_detach(chain);
-	mutex_unlock(&block->lock);
 
 	/* The last dropped non-action reference will trigger notification. */
-	if (is_last && !by_act) {
-		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
+	if (refcnt - chain->action_refcnt == 0 && !by_act) {
+		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
 				       block, NULL, 0, 0, false);
 		/* Last reference to chain, no need to lock. */
 		chain->flushing = false;
 	}
 
+	if (refcnt == 0)
+		free_block = tcf_chain_detach(chain);
+	mutex_unlock(&block->lock);
+
 	if (refcnt == 0) {
 		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
 		tcf_chain_destroy(chain, free_block);

From 5355ed6388e23b69a00d48398a68d022135e6486 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 6 Mar 2019 10:41:00 -0800
Subject: [PATCH 59/72] fou, fou6: avoid uninit-value in gue_err() and
 gue6_err()

My prior commit missed the fact that these functions
were using udp_hdr() (aka skb_transport_header())
to get access to GUE header.

Since pskb_transport_may_pull() does not exist yet, we have to add
transport_offset to our pskb_may_pull() calls.

BUG: KMSAN: uninit-value in gue_err+0x514/0xfa0 net/ipv4/fou.c:1032
CPU: 1 PID: 10648 Comm: syz-executor.1 Not tainted 5.0.0+ #11
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x173/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600
 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313
 gue_err+0x514/0xfa0 net/ipv4/fou.c:1032
 __udp4_lib_err_encap_no_sk net/ipv4/udp.c:571 [inline]
 __udp4_lib_err_encap net/ipv4/udp.c:626 [inline]
 __udp4_lib_err+0x12e6/0x1d40 net/ipv4/udp.c:665
 udp_err+0x74/0x90 net/ipv4/udp.c:737
 icmp_socket_deliver net/ipv4/icmp.c:767 [inline]
 icmp_unreach+0xb65/0x1070 net/ipv4/icmp.c:884
 icmp_rcv+0x11a1/0x1950 net/ipv4/icmp.c:1066
 ip_protocol_deliver_rcu+0x584/0xbb0 net/ipv4/ip_input.c:208
 ip_local_deliver_finish net/ipv4/ip_input.c:234 [inline]
 NF_HOOK include/linux/netfilter.h:289 [inline]
 ip_local_deliver+0x624/0x7b0 net/ipv4/ip_input.c:255
 dst_input include/net/dst.h:450 [inline]
 ip_rcv_finish net/ipv4/ip_input.c:414 [inline]
 NF_HOOK include/linux/netfilter.h:289 [inline]
 ip_rcv+0x6bd/0x740 net/ipv4/ip_input.c:524
 __netif_receive_skb_one_core net/core/dev.c:4973 [inline]
 __netif_receive_skb net/core/dev.c:5083 [inline]
 process_backlog+0x756/0x10e0 net/core/dev.c:5923
 napi_poll net/core/dev.c:6346 [inline]
 net_rx_action+0x78b/0x1a60 net/core/dev.c:6412
 __do_softirq+0x53f/0x93a kernel/softirq.c:293
 invoke_softirq kernel/softirq.c:375 [inline]
 irq_exit+0x214/0x250 kernel/softirq.c:416
 exiting_irq+0xe/0x10 arch/x86/include/asm/apic.h:536
 smp_apic_timer_interrupt+0x48/0x70 arch/x86/kernel/apic/apic.c:1064
 apic_timer_interrupt+0x2e/0x40 arch/x86/entry/entry_64.S:814
 </IRQ>
RIP: 0010:finish_lock_switch+0x2b/0x40 kernel/sched/core.c:2597
Code: 48 89 e5 53 48 89 fb e8 63 e7 95 00 8b b8 88 0c 00 00 48 8b 00 48 85 c0 75 12 48 89 df e8 dd db 95 00 c6 00 00 c6 03 00 fb 5b <5d> c3 e8 4e e6 95 00 eb e7 66 90 66 2e 0f 1f 84 00 00 00 00 00 55
RSP: 0018:ffff888081a0fc80 EFLAGS: 00000296 ORIG_RAX: ffffffffffffff13
RAX: ffff88821fd6bd80 RBX: ffff888027898000 RCX: ccccccccccccd000
RDX: ffff88821fca8d80 RSI: ffff888000000000 RDI: 00000000000004a0
RBP: ffff888081a0fc80 R08: 0000000000000002 R09: ffff888081a0fb08
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001
R13: ffff88811130e388 R14: ffff88811130da00 R15: ffff88812fdb7d80
 finish_task_switch+0xfc/0x2d0 kernel/sched/core.c:2698
 context_switch kernel/sched/core.c:2851 [inline]
 __schedule+0x6cc/0x800 kernel/sched/core.c:3491
 schedule+0x15b/0x240 kernel/sched/core.c:3535
 freezable_schedule include/linux/freezer.h:172 [inline]
 do_nanosleep+0x2ba/0x980 kernel/time/hrtimer.c:1679
 hrtimer_nanosleep kernel/time/hrtimer.c:1733 [inline]
 __do_sys_nanosleep kernel/time/hrtimer.c:1767 [inline]
 __se_sys_nanosleep+0x746/0x960 kernel/time/hrtimer.c:1754
 __x64_sys_nanosleep+0x3e/0x60 kernel/time/hrtimer.c:1754
 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
 entry_SYSCALL_64_after_hwframe+0x63/0xe7
RIP: 0033:0x4855a0
Code: 00 00 48 c7 c0 d4 ff ff ff 64 c7 00 16 00 00 00 31 c0 eb be 66 0f 1f 44 00 00 83 3d b1 11 5d 00 00 75 14 b8 23 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 04 e2 f8 ff c3 48 83 ec 08 e8 3a 55 fd ff
RSP: 002b:0000000000a4fd58 EFLAGS: 00000246 ORIG_RAX: 0000000000000023
RAX: ffffffffffffffda RBX: 0000000000085780 RCX: 00000000004855a0
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000a4fd60
RBP: 00000000000007ec R08: 0000000000000001 R09: 0000000000ceb940
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000008
R13: 0000000000a4fdb0 R14: 0000000000085711 R15: 0000000000a4fdc0

Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205 [inline]
 kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159
 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176
 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185
 slab_post_alloc_hook mm/slab.h:445 [inline]
 slab_alloc_node mm/slub.c:2773 [inline]
 __kmalloc_node_track_caller+0xe9e/0xff0 mm/slub.c:4398
 __kmalloc_reserve net/core/skbuff.c:140 [inline]
 __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
 alloc_skb include/linux/skbuff.h:1012 [inline]
 alloc_skb_with_frags+0x186/0xa60 net/core/skbuff.c:5287
 sock_alloc_send_pskb+0xafd/0x10a0 net/core/sock.c:2091
 sock_alloc_send_skb+0xca/0xe0 net/core/sock.c:2108
 __ip_append_data+0x34cd/0x5000 net/ipv4/ip_output.c:998
 ip_append_data+0x324/0x480 net/ipv4/ip_output.c:1220
 icmp_push_reply+0x23d/0x7e0 net/ipv4/icmp.c:375
 __icmp_send+0x2ea3/0x30f0 net/ipv4/icmp.c:737
 icmp_send include/net/icmp.h:47 [inline]
 ipv4_link_failure+0x6d/0x230 net/ipv4/route.c:1190
 dst_link_failure include/net/dst.h:427 [inline]
 arp_error_report+0x106/0x1a0 net/ipv4/arp.c:297
 neigh_invalidate+0x359/0x8e0 net/core/neighbour.c:992
 neigh_timer_handler+0xdf2/0x1280 net/core/neighbour.c:1078
 call_timer_fn+0x285/0x600 kernel/time/timer.c:1325
 expire_timers kernel/time/timer.c:1362 [inline]
 __run_timers+0xdb4/0x11d0 kernel/time/timer.c:1681
 run_timer_softirq+0x2e/0x50 kernel/time/timer.c:1694
 __do_softirq+0x53f/0x93a kernel/softirq.c:293

Fixes: 26fc181e6cac ("fou, fou6: do not assume linear skbs")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Stefano Brivio <sbrivio@redhat.com>
Cc: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fou.c  | 4 ++--
 net/ipv6/fou6.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 437070d1ffb1..79e98e21cdd7 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1024,7 +1024,7 @@ static int gue_err(struct sk_buff *skb, u32 info)
 	int ret;
 
 	len = sizeof(struct udphdr) + sizeof(struct guehdr);
-	if (!pskb_may_pull(skb, len))
+	if (!pskb_may_pull(skb, transport_offset + len))
 		return -EINVAL;
 
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
@@ -1059,7 +1059,7 @@ static int gue_err(struct sk_buff *skb, u32 info)
 
 	optlen = guehdr->hlen << 2;
 
-	if (!pskb_may_pull(skb, len + optlen))
+	if (!pskb_may_pull(skb, transport_offset + len + optlen))
 		return -EINVAL;
 
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 867474abe269..ec4e2ed95f36 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -94,7 +94,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	int ret;
 
 	len = sizeof(struct udphdr) + sizeof(struct guehdr);
-	if (!pskb_may_pull(skb, len))
+	if (!pskb_may_pull(skb, transport_offset + len))
 		return -EINVAL;
 
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
@@ -129,7 +129,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	optlen = guehdr->hlen << 2;
 
-	if (!pskb_may_pull(skb, len + optlen))
+	if (!pskb_may_pull(skb, transport_offset + len + optlen))
 		return -EINVAL;
 
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];

From 580411d07ce780c2dd721837ec1e42e30c5a58ee Mon Sep 17 00:00:00 2001
From: Matthew Whitehead <tedheadster@gmail.com>
Date: Wed, 6 Mar 2019 14:41:27 -0500
Subject: [PATCH 60/72] 8139too : Add support for U.S. Robotics USR997901A
 10/100 Cardbus NIC

Add PCI vendor and device identifier for U.S. Robotics USR997901A
10/100 Cardbus NIC. Tested on real hardware.

Signed-off-by: Matthew Whitehead <tedheadster@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/8139too.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 69d752f0b621..55d01266e615 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -258,6 +258,7 @@ static const struct pci_device_id rtl8139_pci_tbl[] = {
 	{0x126c, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
 	{0x1743, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
 	{0x021b, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
+	{0x16ec, 0xab06, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
 
 #ifdef CONFIG_SH_SECUREEDGE5410
 	/* Bogus 8139 silicon reports 8129 without external PROM :-( */

From 09073525f8b9095931e5a82209b61abc7ce4c9d5 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 16:29:33 -0700
Subject: [PATCH 61/72] net: ethernet: sun: Zero initialize class in default
 case in niu_add_ethtool_tcam_entry

When building with -Wsometimes-uninitialized, Clang warns:

drivers/net/ethernet/sun/niu.c:7466:5: warning: variable 'class' is used
uninitialized whenever switch default is taken
[-Wsometimes-uninitialized]

The default case can never happen because i can only be 0 to 3
(NIU_L3_PROG_CLS is defined as 4). To make this clear to Clang,
just zero initialize class in the default case (use the macro
CLASS_CODE_UNRECOG to make it clear this shouldn't happen).

Link: https://github.com/ClangBuiltLinux/linux/issues/403
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/niu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index d84501441edd..6f99437a6962 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -7464,6 +7464,7 @@ static int niu_add_ethtool_tcam_entry(struct niu *np,
 					class = CLASS_CODE_USER_PROG4;
 					break;
 				default:
+					class = CLASS_CODE_UNRECOG;
 					break;
 				}
 				ret = tcam_user_ip_class_set(np, class, 0,

From 9d3e1368bb45893a75a5dfb7cd21fdebfa6b47af Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 8 Mar 2019 22:09:47 +0100
Subject: [PATCH 62/72] tcp: handle inet_csk_reqsk_queue_add() failures

Commit 7716682cc58e ("tcp/dccp: fix another race at listener
dismantle") let inet_csk_reqsk_queue_add() fail, and adjusted
{tcp,dccp}_check_req() accordingly. However, TFO and syncookies
weren't modified, thus leaking allocated resources on error.

Contrary to tcp_check_req(), in both syncookies and TFO cases,
we need to drop the request socket. Also, since the child socket is
created with inet_csk_clone_lock(), we have to unlock it and drop an
extra reference (->sk_refcount is initially set to 2 and
inet_csk_reqsk_queue_add() drops only one ref).

For TFO, we also need to revert the work done by tcp_try_fastopen()
(with reqsk_fastopen_remove()).

Fixes: 7716682cc58e ("tcp/dccp: fix another race at listener dismantle")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 7 ++++++-
 net/ipv4/tcp_input.c  | 8 +++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 606f868d9f3f..e531344611a0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -216,7 +216,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 		refcount_set(&req->rsk_refcnt, 1);
 		tcp_sk(child)->tsoffset = tsoff;
 		sock_rps_save_rxhash(child, skb);
-		inet_csk_reqsk_queue_add(sk, req, child);
+		if (!inet_csk_reqsk_queue_add(sk, req, child)) {
+			bh_unlock_sock(child);
+			sock_put(child);
+			child = NULL;
+			reqsk_put(req);
+		}
 	} else {
 		reqsk_free(req);
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4eb0c8ca3c60..5def3c48870e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6498,7 +6498,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
 				    &foc, TCP_SYNACK_FASTOPEN);
 		/* Add the child socket directly into the accept queue */
-		inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+		if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
+			reqsk_fastopen_remove(fastopen_sk, req, false);
+			bh_unlock_sock(fastopen_sk);
+			sock_put(fastopen_sk);
+			reqsk_put(req);
+			goto drop;
+		}
 		sk->sk_data_ready(sk);
 		bh_unlock_sock(fastopen_sk);
 		sock_put(fastopen_sk);

From 69ffaebb90369ce08657b5aea4896777b9d6e8fc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 9 Mar 2019 00:29:58 +0000
Subject: [PATCH 63/72] rxrpc: Fix client call queueing, waiting for channel

rxrpc_get_client_conn() adds a new call to the front of the waiting_calls
queue if the connection it's going to use already exists.  This is bad as
it allows calls to get starved out.

Fix this by adding to the tail instead.

Also change the other enqueue point in the same function to put it on the
front (ie. when we have a new connection).  This makes the point that in
the case of a new connection the new call goes at the front (though it
doesn't actually matter since the queue should be unoccupied).

Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/conn_client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index f307a05076e1..83797b3949e2 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -353,7 +353,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
 	 * normally have to take channel_lock but we do this before anyone else
 	 * can see the connection.
 	 */
-	list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+	list_add(&call->chan_wait_link, &candidate->waiting_calls);
 
 	if (cp->exclusive) {
 		call->conn = candidate;
@@ -432,7 +432,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
 	call->conn = conn;
 	call->security_ix = conn->security_ix;
 	call->service_id = conn->service_id;
-	list_add(&call->chan_wait_link, &conn->waiting_calls);
+	list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
 	spin_unlock(&conn->channel_lock);
 	_leave(" = 0 [extant %d]", conn->debug_id);
 	return 0;

From 7cbbee050c959f41b512599bafd99685f419ce26 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 8 Mar 2019 01:21:27 +0100
Subject: [PATCH 64/72] net: dsa: mv88e6xxx: Set correct interface mode for
 CPU/DSA ports

By default, the switch driver is expected to configure CPU and DSA
ports to their maximum speed. For the 6341 and 6390 families, the
ports interface mode has to be configured as well. The 6390X range
support 10G ports using XAUI, while the 6341 and 6390 supports
2500BaseX, as their maximum speed.

Fixes: 787799a9d555 ("net: dsa: mv88e6xxx: Default ports 9/10 6390X CMODE to 1000BaseX")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 11 +++++++++++
 drivers/net/dsa/mv88e6xxx/chip.h |  3 +++
 drivers/net/dsa/mv88e6xxx/port.c | 24 ++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/port.h |  4 ++++
 4 files changed, 42 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 96728d1e9824..f4e2db44ad91 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -569,6 +569,9 @@ int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link,
 			goto restore_link;
 	}
 
+	if (speed == SPEED_MAX && chip->info->ops->port_max_speed_mode)
+		mode = chip->info->ops->port_max_speed_mode(port);
+
 	if (chip->info->ops->port_set_pause) {
 		err = chip->info->ops->port_set_pause(chip, port, pause);
 		if (err)
@@ -3067,6 +3070,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6341_port_set_speed,
+	.port_max_speed_mode = mv88e6341_port_max_speed_mode,
 	.port_tag_remap = mv88e6095_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3385,6 +3389,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6390_port_set_speed,
+	.port_max_speed_mode = mv88e6390_port_max_speed_mode,
 	.port_tag_remap = mv88e6390_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3429,6 +3434,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6390x_port_set_speed,
+	.port_max_speed_mode = mv88e6390x_port_max_speed_mode,
 	.port_tag_remap = mv88e6390_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3473,6 +3479,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6390_port_set_speed,
+	.port_max_speed_mode = mv88e6390_port_max_speed_mode,
 	.port_tag_remap = mv88e6390_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3566,6 +3573,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6390_port_set_speed,
+	.port_max_speed_mode = mv88e6390_port_max_speed_mode,
 	.port_tag_remap = mv88e6390_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3697,6 +3705,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6341_port_set_speed,
+	.port_max_speed_mode = mv88e6341_port_max_speed_mode,
 	.port_tag_remap = mv88e6095_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3872,6 +3881,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6390_port_set_speed,
+	.port_max_speed_mode = mv88e6390_port_max_speed_mode,
 	.port_tag_remap = mv88e6390_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3920,6 +3930,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.port_set_duplex = mv88e6xxx_port_set_duplex,
 	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
 	.port_set_speed = mv88e6390x_port_set_speed,
+	.port_max_speed_mode = mv88e6390x_port_max_speed_mode,
 	.port_tag_remap = mv88e6390_port_tag_remap,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index adcf60779895..19c07dff0440 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -377,6 +377,9 @@ struct mv88e6xxx_ops {
 	 */
 	int (*port_set_speed)(struct mv88e6xxx_chip *chip, int port, int speed);
 
+	/* What interface mode should be used for maximum speed? */
+	phy_interface_t (*port_max_speed_mode)(int port);
+
 	int (*port_tag_remap)(struct mv88e6xxx_chip *chip, int port);
 
 	int (*port_set_frame_mode)(struct mv88e6xxx_chip *chip, int port,
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 0796c6feec55..dce84a2a65c7 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -312,6 +312,14 @@ int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
 	return mv88e6xxx_port_set_speed(chip, port, speed, !port, true);
 }
 
+phy_interface_t mv88e6341_port_max_speed_mode(int port)
+{
+	if (port == 5)
+		return PHY_INTERFACE_MODE_2500BASEX;
+
+	return PHY_INTERFACE_MODE_NA;
+}
+
 /* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */
 int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
 {
@@ -345,6 +353,14 @@ int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
 	return mv88e6xxx_port_set_speed(chip, port, speed, true, true);
 }
 
+phy_interface_t mv88e6390_port_max_speed_mode(int port)
+{
+	if (port == 9 || port == 10)
+		return PHY_INTERFACE_MODE_2500BASEX;
+
+	return PHY_INTERFACE_MODE_NA;
+}
+
 /* Support 10, 100, 200, 1000, 2500, 10000 Mbps (e.g. 88E6190X) */
 int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
 {
@@ -360,6 +376,14 @@ int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
 	return mv88e6xxx_port_set_speed(chip, port, speed, true, true);
 }
 
+phy_interface_t mv88e6390x_port_max_speed_mode(int port)
+{
+	if (port == 9 || port == 10)
+		return PHY_INTERFACE_MODE_XAUI;
+
+	return PHY_INTERFACE_MODE_NA;
+}
+
 int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 			      phy_interface_t mode)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 4aadf321edb7..c7bed263a0f4 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -285,6 +285,10 @@ int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
 int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
 int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
 
+phy_interface_t mv88e6341_port_max_speed_mode(int port);
+phy_interface_t mv88e6390_port_max_speed_mode(int port);
+phy_interface_t mv88e6390x_port_max_speed_mode(int port);
+
 int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state);
 
 int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map);

From 1f5d861f7fefa971b2c6e766f77932c86419a319 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 21:02:39 -0700
Subject: [PATCH 65/72] net: stmmac: Avoid one more sometimes uninitialized
 Clang warning

When building with -Wsometimes-uninitialized, Clang warns:

drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c:111:2: error: variable
'ns' is used uninitialized whenever 'if' condition is false
[-Werror,-Wsometimes-uninitialized]
drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c:111:2: error: variable
'ns' is used uninitialized whenever '&&' condition is false
[-Werror,-Wsometimes-uninitialized]

Clang is concerned with the use of stmmac_do_void_callback (which
stmmac_get_systime wraps), as it may fail to initialize these values if
the if condition was ever false (meaning the callback doesn't exist).
It's not wrong because the callback is what initializes ns. While it's
unlikely that the callback is going to disappear at some point and make
that condition false, we can easily avoid this warning by zero
initializing the variable.

Link: https://github.com/ClangBuiltLinux/linux/issues/384
Fixes: df103170854e ("net: stmmac: Avoid sometimes uninitialized Clang warnings")
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 2293e21f789f..cc60b3fb0892 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -105,7 +105,7 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	struct stmmac_priv *priv =
 	    container_of(ptp, struct stmmac_priv, ptp_clock_ops);
 	unsigned long flags;
-	u64 ns;
+	u64 ns = 0;
 
 	spin_lock_irqsave(&priv->ptp_lock, flags);
 	stmmac_get_systime(priv, priv->ptpaddr, &ns);

From 1039c6e1936ef2be0f342bd56fcf0fb25c1df5fe Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 9 Mar 2019 10:26:53 +0100
Subject: [PATCH 66/72] net: keep refcount warning in reqsk_free()

As Eric Dumazet said, "We do not have a way to tell if the req was ever
inserted in a hash table, so better play safe.".
Let's remove this comment, so that nobody will be tempted to drop the
WARN_ON_ONCE() line.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 347015515a7d..21a5243fecd1 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -108,7 +108,6 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
 
 static inline void reqsk_free(struct request_sock *req)
 {
-	/* temporary debugging */
 	WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
 
 	req->rsk_ops->destructor(req);

From 083b78a9ed64bc71957dd7da866c128a307ea062 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 9 Mar 2019 14:43:38 -0800
Subject: [PATCH 67/72] ip: fix ip_mc_may_pull() return value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ip_mc_may_pull() must return 0 if there is a problem, not an errno.

syzbot reported :

BUG: KASAN: use-after-free in br_ip4_multicast_igmp3_report net/bridge/br_multicast.c:947 [inline]
BUG: KASAN: use-after-free in br_multicast_ipv4_rcv net/bridge/br_multicast.c:1631 [inline]
BUG: KASAN: use-after-free in br_multicast_rcv+0x3cd8/0x4440 net/bridge/br_multicast.c:1741
Read of size 4 at addr ffff88820a4084ee by task syz-executor.2/11183

CPU: 1 PID: 11183 Comm: syz-executor.2 Not tainted 5.0.0+ #14
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131
 br_ip4_multicast_igmp3_report net/bridge/br_multicast.c:947 [inline]
 br_multicast_ipv4_rcv net/bridge/br_multicast.c:1631 [inline]
 br_multicast_rcv+0x3cd8/0x4440 net/bridge/br_multicast.c:1741
 br_handle_frame_finish+0xa3a/0x14c0 net/bridge/br_input.c:108
 br_nf_hook_thresh+0x2ec/0x380 net/bridge/br_netfilter_hooks.c:1005
 br_nf_pre_routing_finish+0x8e2/0x1750 net/bridge/br_netfilter_hooks.c:410
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 br_nf_pre_routing+0x7e7/0x13a0 net/bridge/br_netfilter_hooks.c:506
 nf_hook_entry_hookfn include/linux/netfilter.h:119 [inline]
 nf_hook_slow+0xbf/0x1f0 net/netfilter/core.c:511
 nf_hook include/linux/netfilter.h:244 [inline]
 NF_HOOK include/linux/netfilter.h:287 [inline]
 br_handle_frame+0x95b/0x1450 net/bridge/br_input.c:305
 __netif_receive_skb_core+0xa96/0x3040 net/core/dev.c:4902
 __netif_receive_skb_one_core+0xa8/0x1a0 net/core/dev.c:4971
 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5083
 netif_receive_skb_internal+0x117/0x660 net/core/dev.c:5186
 netif_receive_skb+0x6e/0x5a0 net/core/dev.c:5261

Fixes: ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index cc85f4524dbf..9c94b2ea789c 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -110,7 +110,7 @@ struct ip_mc_list {
 static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len)
 {
 	if (skb_transport_offset(skb) + ip_transport_len(skb) < len)
-		return -EINVAL;
+		return 0;
 
 	return pskb_may_pull(skb, len);
 }

From 69b51bbb03f73e04c486f79d1556b2d9becf4dbc Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Sun, 10 Mar 2019 14:47:51 +0800
Subject: [PATCH 68/72] net: hns3: fix to stop multiple HNS reset due to the
 AER changes

The commit bfcb79fca19d
("PCI/ERR: Run error recovery callbacks for all affected devices")
affected the non-fatal error recovery logic for the HNS and RDMA devices.
This is because each HNS PF under PCIe bus receive callbacks
from the AER driver when an error is reported for one of the PF.
This causes unwanted PF resets because
the HNS decides which PF to reset based on the reset type set.
The HNS error handling code sets the reset type based on the hw error
type detected.

This patch provides fix for the above issue for the recovery of
the hw errors in the HNS and RDMA devices.

This patch needs backporting to the kernel v5.0+

Fixes: 332fbf576579 ("net: hns3: add handling of hw ras errors using new set of commands")
Reported-by: Xiaofei Tan <tanxiaofei@huawei.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h            | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c        | 4 +++-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 9 +++++++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 66d7a8b80e76..38b430f11fc1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -194,6 +194,7 @@ struct hnae3_ae_dev {
 	const struct hnae3_ae_ops *ops;
 	struct list_head node;
 	u32 flag;
+	u8 override_pci_need_reset; /* fix to stop multiple reset happening */
 	enum hnae3_dev_type dev_type;
 	enum hnae3_reset_type reset_type;
 	void *priv;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 0d1ae15a5927..1c1f17ec6be2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1850,7 +1850,9 @@ static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev)
 
 	/* request the reset */
 	if (ae_dev->ops->reset_event) {
-		ae_dev->ops->reset_event(pdev, NULL);
+		if (!ae_dev->override_pci_need_reset)
+			ae_dev->ops->reset_event(pdev, NULL);
+
 		return PCI_ERS_RESULT_RECOVERED;
 	}
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 1feceff1477c..1f52d11f77b5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1317,8 +1317,10 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 		hclge_handle_all_ras_errors(hdev);
 	} else {
 		if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
-		    hdev->pdev->revision < 0x21)
+		    hdev->pdev->revision < 0x21) {
+			ae_dev->override_pci_need_reset = 1;
 			return PCI_ERS_RESULT_RECOVERED;
+		}
 	}
 
 	if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
@@ -1327,8 +1329,11 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 	}
 
 	if (status & HCLGE_RAS_REG_NFE_MASK ||
-	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
+	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
+		ae_dev->override_pci_need_reset = 0;
 		return PCI_ERS_RESULT_NEED_RESET;
+	}
+	ae_dev->override_pci_need_reset = 1;
 
 	return PCI_ERS_RESULT_RECOVERED;
 }

From d721fe99f6ada070ae8fc0ec3e01ce5a42def0d9 Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kjlu@umn.edu>
Date: Fri, 8 Mar 2019 23:49:34 -0600
Subject: [PATCH 69/72] isdn: mISDNinfineon: fix potential NULL pointer
 dereference

In case ioremap fails, the fix returns -ENOMEM to avoid NULL
pointer dereference.

Signed-off-by: Kangjie Lu <kjlu@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/mISDNinfineon.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index 3e01012be4ab..0fe6ddcb3fdc 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -712,8 +712,11 @@ setup_io(struct inf_hw *hw)
 				(ulong)hw->addr.start, (ulong)hw->addr.size);
 			return err;
 		}
-		if (hw->ci->addr_mode == AM_MEMIO)
+		if (hw->ci->addr_mode == AM_MEMIO) {
 			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
+			if (unlikely(!hw->addr.p))
+				return -ENOMEM;
+		}
 		hw->addr.mode = hw->ci->addr_mode;
 		if (debug & DEBUG_HW)
 			pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n",

From 95d6ebd53c79522bf9502dbc7e89e0d63f94dae4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 10 Mar 2019 09:07:14 -0700
Subject: [PATCH 70/72] net/x25: fix use-after-free in x25_device_event()

In case of failure x25_connect() does a x25_neigh_put(x25->neighbour)
but forgets to clear x25->neighbour pointer, thus triggering use-after-free.

Since the socket is visible in x25_list, we need to hold x25_list_lock
to protect the operation.

syzbot report :

BUG: KASAN: use-after-free in x25_kill_by_device net/x25/af_x25.c:217 [inline]
BUG: KASAN: use-after-free in x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252
Read of size 8 at addr ffff8880a030edd0 by task syz-executor003/7854

CPU: 0 PID: 7854 Comm: syz-executor003 Not tainted 5.0.0+ #97
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135
 x25_kill_by_device net/x25/af_x25.c:217 [inline]
 x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252
 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739
 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline]
 call_netdevice_notifiers net/core/dev.c:1765 [inline]
 __dev_notify_flags+0x1e9/0x2c0 net/core/dev.c:7607
 dev_change_flags+0x10d/0x170 net/core/dev.c:7643
 dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237
 dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488
 sock_do_ioctl+0x1bd/0x300 net/socket.c:995
 sock_ioctl+0x32b/0x610 net/socket.c:1096
 vfs_ioctl fs/ioctl.c:46 [inline]
 file_ioctl fs/ioctl.c:509 [inline]
 do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696
 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713
 __do_sys_ioctl fs/ioctl.c:720 [inline]
 __se_sys_ioctl fs/ioctl.c:718 [inline]
 __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4467c9
Code: e8 0c e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 5b 07 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fdbea222d98 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00000000006dbc58 RCX: 00000000004467c9
RDX: 0000000020000340 RSI: 0000000000008914 RDI: 0000000000000003
RBP: 00000000006dbc50 R08: 00007fdbea223700 R09: 0000000000000000
R10: 00007fdbea223700 R11: 0000000000000246 R12: 00000000006dbc5c
R13: 6000030030626669 R14: 0000000000000000 R15: 0000000030626669

Allocated by task 7843:
 save_stack+0x45/0xd0 mm/kasan/common.c:73
 set_track mm/kasan/common.c:85 [inline]
 __kasan_kmalloc mm/kasan/common.c:495 [inline]
 __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:468
 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:509
 kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3615
 kmalloc include/linux/slab.h:545 [inline]
 x25_link_device_up+0x46/0x3f0 net/x25/x25_link.c:249
 x25_device_event+0x116/0x2b0 net/x25/af_x25.c:242
 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739
 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline]
 call_netdevice_notifiers net/core/dev.c:1765 [inline]
 __dev_notify_flags+0x121/0x2c0 net/core/dev.c:7605
 dev_change_flags+0x10d/0x170 net/core/dev.c:7643
 dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237
 dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488
 sock_do_ioctl+0x1bd/0x300 net/socket.c:995
 sock_ioctl+0x32b/0x610 net/socket.c:1096
 vfs_ioctl fs/ioctl.c:46 [inline]
 file_ioctl fs/ioctl.c:509 [inline]
 do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696
 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713
 __do_sys_ioctl fs/ioctl.c:720 [inline]
 __se_sys_ioctl fs/ioctl.c:718 [inline]
 __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 7865:
 save_stack+0x45/0xd0 mm/kasan/common.c:73
 set_track mm/kasan/common.c:85 [inline]
 __kasan_slab_free+0x102/0x150 mm/kasan/common.c:457
 kasan_slab_free+0xe/0x10 mm/kasan/common.c:465
 __cache_free mm/slab.c:3494 [inline]
 kfree+0xcf/0x230 mm/slab.c:3811
 x25_neigh_put include/net/x25.h:253 [inline]
 x25_connect+0x8d8/0xde0 net/x25/af_x25.c:824
 __sys_connect+0x266/0x330 net/socket.c:1685
 __do_sys_connect net/socket.c:1696 [inline]
 __se_sys_connect net/socket.c:1693 [inline]
 __x64_sys_connect+0x73/0xb0 net/socket.c:1693
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

The buggy address belongs to the object at ffff8880a030edc0
 which belongs to the cache kmalloc-256 of size 256
The buggy address is located 16 bytes inside of
 256-byte region [ffff8880a030edc0, ffff8880a030eec0)
The buggy address belongs to the page:
page:ffffea000280c380 count:1 mapcount:0 mapping:ffff88812c3f07c0 index:0x0
flags: 0x1fffc0000000200(slab)
raw: 01fffc0000000200 ffffea0002806788 ffffea00027f0188 ffff88812c3f07c0
raw: 0000000000000000 ffff8880a030e000 000000010000000c 0000000000000000
page dumped because: kasan: bad access detected

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+04babcefcd396fabec37@syzkaller.appspotmail.com
Cc: andrew hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index eff31348e20b..27171ac6fe3b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -820,8 +820,12 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
 	sock->state = SS_CONNECTED;
 	rc = 0;
 out_put_neigh:
-	if (rc)
+	if (rc) {
+		read_lock_bh(&x25_list_lock);
 		x25_neigh_put(x25->neighbour);
+		x25->neighbour = NULL;
+		read_unlock_bh(&x25_list_lock);
+	}
 out_put_route:
 	x25_route_put(rt);
 out:

From 59cbf56fcd98ba2a715b6e97c4e43f773f956393 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 10 Mar 2019 10:36:40 -0700
Subject: [PATCH 71/72] vxlan: test dev->flags & IFF_UP before calling
 gro_cells_receive()

Same reasons than the ones explained in commit 4179cb5a4c92
("vxlan: test dev->flags & IFF_UP before calling netif_rx()")

netif_rx() or gro_cells_receive() must be called under a strict contract.

At device dismantle phase, core networking clears IFF_UP
and flush_all_backlogs() is called after rcu grace period
to make sure no incoming packet might be in a cpu backlog
and still referencing the device.

A similar protocol is used for gro_cells infrastructure, as
gro_cells_destroy() will be called only after a full rcu
grace period is observed after IFF_UP has been cleared.

Most drivers call netif_rx() from their interrupt handler,
and since the interrupts are disabled at device dismantle,
netif_rx() does not have to check dev->flags & IFF_UP

Virtual drivers do not have this guarantee, and must
therefore make the check themselves.

Otherwise we risk use-after-free and/or crashes.

Fixes: d342894c5d2f ("vxlan: virtual extensible lan")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7610c51b33a0..077f1b9f2761 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1731,6 +1731,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
+	rcu_read_lock();
+
+	if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
+		rcu_read_unlock();
+		atomic_long_inc(&vxlan->dev->rx_dropped);
+		goto drop;
+	}
+
 	stats = this_cpu_ptr(vxlan->dev->tstats);
 	u64_stats_update_begin(&stats->syncp);
 	stats->rx_packets++;
@@ -1738,6 +1746,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 	u64_stats_update_end(&stats->syncp);
 
 	gro_cells_receive(&vxlan->gro_cells, skb);
+
+	rcu_read_unlock();
+
 	return 0;
 
 drop:

From 2a5ff07a0eb945f291e361aa6f6becca8340ba46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 10 Mar 2019 10:39:37 -0700
Subject: [PATCH 72/72] gro_cells: make sure device is up in
 gro_cells_receive()

We keep receiving syzbot reports [1] that show that tunnels do not play
the rcu/IFF_UP rules properly.

At device dismantle phase, gro_cells_destroy() will be called
only after a full rcu grace period is observed after IFF_UP
has been cleared.

This means that IFF_UP needs to be tested before queueing packets
into netif_rx() or gro_cells.

This patch implements the test in gro_cells_receive() because
too many callers do not seem to bother enough.

[1]
BUG: unable to handle kernel paging request at fffff4ca0b9ffffe
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.0.0+ #97
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: netns cleanup_net
RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline]
RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline]
RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline]
RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline]
RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78
Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03
RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02
RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe
RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0
RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645
R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000
R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75
kobject: 'loop2' (000000004bd7d84a): kobject_uevent_env
FS:  0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0
Call Trace:
kobject: 'loop2' (000000004bd7d84a): fill_kobj_path: path = '/devices/virtual/block/loop2'
 ip_tunnel_dev_free+0x19/0x60 net/ipv4/ip_tunnel.c:1010
 netdev_run_todo+0x51c/0x7d0 net/core/dev.c:8970
 rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:116
 ip_tunnel_delete_nets+0x423/0x5f0 net/ipv4/ip_tunnel.c:1124
 vti_exit_batch_net+0x23/0x30 net/ipv4/ip_vti.c:495
 ops_exit_list.isra.0+0x105/0x160 net/core/net_namespace.c:156
 cleanup_net+0x3fb/0x960 net/core/net_namespace.c:551
 process_one_work+0x98e/0x1790 kernel/workqueue.c:2173
 worker_thread+0x98/0xe40 kernel/workqueue.c:2319
 kthread+0x357/0x430 kernel/kthread.c:246
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352
Modules linked in:
CR2: fffff4ca0b9ffffe
   [ end trace 513fc9c1338d1cb3 ]
RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline]
RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline]
RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline]
RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline]
RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78
Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03
RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02
RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe
RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0
RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645
R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000
kobject: 'loop3' (00000000e4ee57a6): kobject_uevent_env
R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75
FS:  0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0

Fixes: c9e6bc644e55 ("net: add gro_cells infrastructure")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gro_cells.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index acf45ddbe924..e095fb871d91 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,22 +13,36 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct gro_cell *cell;
+	int res;
 
-	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
-		return netif_rx(skb);
+	rcu_read_lock();
+	if (unlikely(!(dev->flags & IFF_UP)))
+		goto drop;
+
+	if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) {
+		res = netif_rx(skb);
+		goto unlock;
+	}
 
 	cell = this_cpu_ptr(gcells->cells);
 
 	if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+drop:
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
-		return NET_RX_DROP;
+		res = NET_RX_DROP;
+		goto unlock;
 	}
 
 	__skb_queue_tail(&cell->napi_skbs, skb);
 	if (skb_queue_len(&cell->napi_skbs) == 1)
 		napi_schedule(&cell->napi);
-	return NET_RX_SUCCESS;
+
+	res = NET_RX_SUCCESS;
+
+unlock:
+	rcu_read_unlock();
+	return res;
 }
 EXPORT_SYMBOL(gro_cells_receive);