From 2d45a02d0166caf2627fe91897c6ffc3b19514c4 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 12 Jun 2015 10:16:41 -0300 Subject: [PATCH 01/18] sctp: fix ASCONF list handling ->auto_asconf_splist is per namespace and mangled by functions like sctp_setsockopt_auto_asconf() which doesn't guarantee any serialization. Also, the call to inet_sk_copy_descendant() was backuping ->auto_asconf_list through the copy but was not honoring ->do_auto_asconf, which could lead to list corruption if it was different between both sockets. This commit thus fixes the list handling by using ->addr_wq_lock spinlock to protect the list. A special handling is done upon socket creation and destruction for that. Error handlig on sctp_init_sock() will never return an error after having initialized asconf, so sctp_destroy_sock() can be called without addrq_wq_lock. The lock now will be take on sctp_close_sock(), before locking the socket, so we don't do it in inverse order compared to sctp_addr_wq_timeout_handler(). Instead of taking the lock on sctp_sock_migrate() for copying and restoring the list values, it's preferred to avoid rewritting it by implementing sctp_copy_descendant(). Issue was found with a test application that kept flipping sysctl default_auto_asconf on and off, but one could trigger it by issuing simultaneous setsockopt() calls on multiple sockets or by creating/destroying sockets fast enough. This is only triggerable locally. Fixes: 9f7d653b67ae ("sctp: Add Auto-ASCONF support (core).") Reported-by: Ji Jianwen Suggested-by: Neil Horman Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 1 + include/net/sctp/structs.h | 4 ++++ net/sctp/socket.c | 43 ++++++++++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 3573a81815ad..8ba379f9e467 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -31,6 +31,7 @@ struct netns_sctp { struct list_head addr_waitq; struct timer_list addr_wq_timer; struct list_head auto_asconf_splist; + /* Lock that protects both addr_waitq and auto_asconf_splist */ spinlock_t addr_wq_lock; /* Lock that protects the local_addr_list writers */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2bb2fcf5b11f..495c87e367b3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -223,6 +223,10 @@ struct sctp_sock { atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; + + /* These must be the last fields, as they will skipped on copies, + * like on accept and peeloff operations + */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f09de7fac2e6..5f6c4e61325b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1528,8 +1528,10 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. + * Also, sctp_destroy_sock() needs to be called with addr_wq_lock + * held and that should be grabbed before socket lock. */ - local_bh_disable(); + spin_lock_bh(&net->sctp.addr_wq_lock); bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() @@ -1539,7 +1541,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - local_bh_enable(); + spin_unlock_bh(&net->sctp.addr_wq_lock); sock_put(sk); @@ -3580,6 +3582,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) return 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); if (val == 0 && sp->do_auto_asconf) { list_del(&sp->auto_asconf_list); sp->do_auto_asconf = 0; @@ -3588,6 +3591,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); return 0; } @@ -4121,18 +4125,28 @@ static int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(net, sk->sk_prot, 1); + + /* Nothing can fail after this block, otherwise + * sctp_destroy_sock() will be called without addr_wq_lock held + */ if (net->sctp.default_auto_asconf) { + spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; - } else + spin_unlock(&sock_net(sk)->sctp.addr_wq_lock); + } else { sp->do_auto_asconf = 0; + } + local_bh_enable(); return 0; } -/* Cleanup any SCTP per socket resources. */ +/* Cleanup any SCTP per socket resources. Must be called with + * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true + */ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -7195,6 +7209,19 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_list = NULL; } +static inline void sctp_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + int ancestor_size = sizeof(struct inet_sock) + + sizeof(struct sctp_sock) - + offsetof(struct sctp_sock, auto_asconf_list); + + if (sk_from->sk_family == PF_INET6) + ancestor_size += sizeof(struct ipv6_pinfo); + + __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); +} + /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ @@ -7209,7 +7236,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; - struct list_head tmplist; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -7217,12 +7243,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - if (oldsp->do_auto_asconf) { - memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); - inet_sk_copy_descendant(newsk, oldsk); - memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); - } else - inet_sk_copy_descendant(newsk, oldsk); + sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. From ac0a72a3e6e8d817f60ce4d9a8f3b43dc256d847 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 14 Jun 2015 17:13:06 +0300 Subject: [PATCH 02/18] net/mlx4_core: Disable Granular QoS per VF under IB/Eth VPI configuration Due to firmware bug, under VPI configuration when port1 = IB and port2 = Eth, Granular QoS per VF isn't working properly. More over, the whole QP0/QP1 Para-Virtualization in the mlx4 IB driver is broken on that config. Hence, we must disable Granular QoS per VF under that configuration till a fix is introduced. Once that happens, a new device capability will be used to mark the feature support on that specific configuration. Reported-by: Doug Ledford Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index ced5ecab5aa7..68b5a5563af7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -479,6 +479,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } + if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) && + (port_type_array[0] == MLX4_PORT_TYPE_IB) && + (port_type_array[1] == MLX4_PORT_TYPE_ETH)) { + mlx4_warn(dev, + "Granular QoS per VF not supported with IB/Eth configuration\n"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP; + } + dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters); dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; From 2dab80a8b486f02222a69daca6859519e05781d9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 15 Jun 2015 20:28:51 +0300 Subject: [PATCH 03/18] bridge: fix br_stp_set_bridge_priority race conditions After the ->set() spinlocks were removed br_stp_set_bridge_priority was left running without any protection when used via sysfs. It can race with port add/del and could result in use-after-free cases and corrupted lists. Tested by running port add/del in a loop with stp enabled while setting priority in a loop, crashes are easily reproducible. The spinlocks around sysfs ->set() were removed in commit: 14f98f258f19 ("bridge: range check STP parameters") There's also a race condition in the netlink priority support that is fixed by this change, but it was introduced recently and the fixes tag covers it, just in case it's needed the commit is: af615762e972 ("bridge: add ageing_time, stp_state, priority over netlink") Signed-off-by: Nikolay Aleksandrov Fixes: 14f98f258f19 ("bridge: range check STP parameters") Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 2 -- net/bridge/br_stp_if.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index a9a4a1b7863d..8d423bc649b9 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -247,9 +247,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); br_stp_set_bridge_priority(br, args[1]); - spin_unlock_bh(&br->lock); return 0; case BRCTL_SET_PORT_PRIORITY: diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 41146872c1b4..7832d07f48f6 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -243,12 +243,13 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) return true; } -/* called under bridge lock */ +/* Acquires and releases bridge lock */ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) { struct net_bridge_port *p; int wasroot; + spin_lock_bh(&br->lock); wasroot = br_is_root_bridge(br); list_for_each_entry(p, &br->port_list, list) { @@ -266,6 +267,7 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); + spin_unlock_bh(&br->lock); } /* called under bridge lock */ From f98f4514d07871da7a113dd9e3e330743fd70ae4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Jun 2015 07:59:11 -0700 Subject: [PATCH 04/18] packet: read num_members once in packet_rcv_fanout() We need to tell compiler it must not read f->num_members multiple times. Otherwise testing if num is not zero is flaky, and we could attempt an invalid divide by 0 in fanout_demux_cpu() Note bug was present in packet_rcv_fanout_hash() and packet_rcv_fanout_lb() but final 3.1 had a simple location after commit 95ec3eb417115fb ("packet: Add 'cpu' fanout policy.") Fixes: dc99f600698dc ("packet: Add fanout support.") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5989c6ee551..131545a06f05 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1353,7 +1353,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; - unsigned int num = f->num_members; + unsigned int num = READ_ONCE(f->num_members); struct packet_sock *po; unsigned int idx; From 2c51a97f76d20ebf1f50fef908b986cb051fdff9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 16 Jun 2015 22:56:39 +0300 Subject: [PATCH 05/18] neigh: do not modify unlinked entries The lockless lookups can return entry that is unlinked. Sometimes they get reference before last neigh_cleanup_and_release, sometimes they do not need reference. Later, any modification attempts may result in the following problems: 1. entry is not destroyed immediately because neigh_update can start the timer for dead entry, eg. on change to NUD_REACHABLE state. As result, entry lives for some time but is invisible and out of control. 2. __neigh_event_send can run in parallel with neigh_destroy while refcnt=0 but if timer is started and expired refcnt can reach 0 for second time leading to second neigh_destroy and possible crash. Thanks to Eric Dumazet and Ying Xue for their work and analyze on the __neigh_event_send change. Fixes: 767e97e1e0db ("neigh: RCU conversion of struct neighbour") Fixes: a263b3093641 ("ipv4: Make neigh lookups directly in output packet path.") Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().") Cc: Eric Dumazet Cc: Ying Xue Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3de654256028..2237c1b3cdd2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -957,6 +957,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) rc = 0; if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; + if (neigh->dead) + goto out_dead; if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + @@ -1013,6 +1015,13 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) write_unlock(&neigh->lock); local_bh_enable(); return rc; + +out_dead: + if (neigh->nud_state & NUD_STALE) + goto out_unlock_bh; + write_unlock_bh(&neigh->lock); + kfree_skb(skb); + return 1; } EXPORT_SYMBOL(__neigh_event_send); @@ -1076,6 +1085,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT))) goto out; + if (neigh->dead) + goto out; if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1225,6 +1236,8 @@ EXPORT_SYMBOL(neigh_update); */ void __neigh_set_probe_once(struct neighbour *neigh) { + if (neigh->dead) + return; neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; From 7b48f45797be7c4bdbb772de80efe6bdcd78e81d Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Wed, 17 Jun 2015 03:05:02 +0200 Subject: [PATCH 06/18] isdn: disable HiSax NetJet driver on microblaze arch Fix an allmodconfig compilation failer on microblaze due to big endian architectures being apparently unsupported by the NetJet code: drivers/isdn/hisax/nj_s.c: In function 'setup_netjet_s': drivers/isdn/hisax/nj_s.c:265:2: error: #error "not running on big endian machines now" Modify the relevant Kconfig such that the NetJet code is not built on microblaze anymore. Note that endianess on microblaze is not determined through Kconfig, but by means of a compiler provided CPP macro, namely __MICROBLAZEEL__. However, gcc defaults to big endianess on that platform. Signed-off-by: Nicolai Stange Acked-by: Jean Delvare Signed-off-by: David S. Miller --- drivers/isdn/hisax/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig index 97465ac5a2d5..eb83d94ab4fe 100644 --- a/drivers/isdn/hisax/Kconfig +++ b/drivers/isdn/hisax/Kconfig @@ -237,7 +237,7 @@ config HISAX_MIC config HISAX_NETJET bool "NETjet card" - depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN))) + depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN) || MICROBLAZE)) depends on VIRT_TO_BUS help This enables HiSax support for the NetJet from Traverse @@ -249,7 +249,7 @@ config HISAX_NETJET config HISAX_NETJET_U bool "NETspider U card" - depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN))) + depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN) || MICROBLAZE)) depends on VIRT_TO_BUS help This enables HiSax support for the Netspider U interface ISDN card From 36c01245eb8046c16eee6431e7dbfbb302635fa8 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sun, 21 Jun 2015 18:50:44 +0200 Subject: [PATCH 07/18] can: fix loss of CAN frames in raw_rcv As reported by Manfred Schlaegl here http://marc.info/?l=linux-netdev&m=143482089824232&w=2 commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. As net timestamping is influenced by several players (netstamp_needed and netdev_tstamp_prequeue) Manfred missed a proper timestamp which leads to CAN frame loss. As skb timestamping became now mandatory for CAN related skbs this patch makes sure that received CAN skbs always have a proper timestamp set. Maybe there's a better solution in the future but this patch fixes the CAN frame loss so far. Reported-by: Manfred Schlaegl Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 5 +++++ drivers/net/can/slcan.c | 1 + drivers/net/can/vcan.c | 3 +++ net/can/af_can.c | 6 +++++- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b0f69248cb71..e9b1810d319f 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,6 +440,9 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; + if (!(skb->tstamp.tv64)) + __net_timestamp(skb); + netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -575,6 +578,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; + __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -603,6 +607,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; + __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index c837eb91d43e..f64f5290d6f8 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,6 +207,7 @@ static void slc_bump(struct slcan *sl) if (!skb) return; + __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 674f367087c5..0ce868de855d 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,6 +78,9 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!(skb->tstamp.tv64)) + __net_timestamp(skb); + netif_rx_ni(skb); } diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc..689c818ed007 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -310,8 +310,12 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) + if (newskb) { + if (!(newskb->tstamp.tv64)) + __net_timestamp(newskb); + netif_rx_ni(newskb); + } /* update statistics */ can_stats.tx_frames++; From 51f458d9612177f69c2e2c437034ae15f93078e7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 17 Jun 2015 13:54:54 +0200 Subject: [PATCH 08/18] mac80211: fix locking in update_vlan_tailroom_need_count() Unfortunately, Michal's change to fix AP_VLAN crypto tailroom caused a locking issue that was reported by lockdep, but only in a few cases - the issue was a classic ABBA deadlock caused by taking the mtx after the key_mtx, where normally they're taken the other way around. As the key mutex protects the field in question (I'm adding a few annotations to make that clear) only the iteration needs to be protected, but we can also iterate the interface list with just RCU protection while holding the key mutex. Fixes: f9dca80b98ca ("mac80211: fix AP_VLAN crypto tailroom calculation") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/key.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a907f2d5c12d..81e9785f38bc 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -66,12 +66,15 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) if (sdata->vif.type != NL80211_IFTYPE_AP) return; - mutex_lock(&sdata->local->mtx); + /* crypto_tx_tailroom_needed_cnt is protected by this */ + assert_key_lock(sdata->local); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_read_lock(); + + list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list) vlan->crypto_tx_tailroom_needed_cnt += delta; - mutex_unlock(&sdata->local->mtx); + rcu_read_unlock(); } static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) @@ -95,6 +98,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ + assert_key_lock(sdata->local); + update_vlan_tailroom_need_count(sdata, 1); if (!sdata->crypto_tx_tailroom_needed_cnt++) { @@ -109,6 +114,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { + assert_key_lock(sdata->local); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); update_vlan_tailroom_need_count(sdata, -delta); From 468479e6043c84f5a65299cc07cb08a22a28c2b1 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 17 Jun 2015 15:59:34 -0400 Subject: [PATCH 09/18] packet: avoid out of bounds read in round robin fanout PACKET_FANOUT_LB computes f->rr_cur such that it is modulo f->num_members. It returns the old value unconditionally, but f->num_members may have changed since the last store. Ensure that the return value is always < num. When modifying the logic, simplify it further by replacing the loop with an unconditional atomic increment. Fixes: dc99f600698d ("packet: Add fanout support.") Suggested-by: Eric Dumazet Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 131545a06f05..fe1610ddeacf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1272,16 +1272,6 @@ static void packet_sock_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static int fanout_rr_next(struct packet_fanout *f, unsigned int num) -{ - int x = atomic_read(&f->rr_cur) + 1; - - if (x >= num) - x = 0; - - return x; -} - static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) @@ -1293,13 +1283,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - int cur, old; + unsigned int val = atomic_inc_return(&f->rr_cur); - cur = atomic_read(&f->rr_cur); - while ((old = atomic_cmpxchg(&f->rr_cur, cur, - fanout_rr_next(f, num))) != cur) - cur = old; - return cur; + return val % num; } static unsigned int fanout_demux_cpu(struct packet_fanout *f, From 538761b794c1542f1c6e31eadd9d7aae118889f7 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 18 Jun 2015 18:36:03 +0300 Subject: [PATCH 10/18] mvneta: add forgotten initialization of autonegotiation bits The commit 898b2970e2c9 ("mvneta: implement SGMII-based in-band link state signaling") changed mvneta_adjust_link() so that it does not clear the auto-negotiation bits in MVNETA_GMAC_AUTONEG_CONFIG register. This was necessary for auto-negotiation mode to work. Unfortunately I haven't checked if these bits are ever initialized. It appears they are not. This patch adds the missing initialization of the auto-negotiation bits in the MVNETA_GMAC_AUTONEG_CONFIG register. It fixes the following regression: https://www.mail-archive.com/netdev@vger.kernel.org/msg67928.html Since the patch was tested to fix a regression, it should be applied to stable tree. Tested-by: Arnaud Ebalard CC: Thomas Petazzoni CC: Florian Fainelli CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Stas Sergeev Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ce5f7f9cff06..74176ec4f39d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1013,6 +1013,12 @@ static void mvneta_defaults_set(struct mvneta_port *pp) val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + } else { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } mvneta_set_ucast_table(pp, -1); From dfea2aa654243f70dc53b8648d0bbdeec55a7df1 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Thu, 18 Jun 2015 09:15:34 -0700 Subject: [PATCH 11/18] tcp: Do not call tcp_fastopen_reset_cipher from interrupt context tcp_fastopen_reset_cipher really cannot be called from interrupt context. It allocates the tcp_fastopen_context with GFP_KERNEL and calls crypto_alloc_cipher, which allocates all kind of stuff with GFP_KERNEL. Thus, we might sleep when the key-generation is triggered by an incoming TFO cookie-request which would then happen in interrupt- context, as shown by enabling CONFIG_DEBUG_ATOMIC_SLEEP: [ 36.001813] BUG: sleeping function called from invalid context at mm/slub.c:1266 [ 36.003624] in_atomic(): 1, irqs_disabled(): 0, pid: 1016, name: packetdrill [ 36.004859] CPU: 1 PID: 1016 Comm: packetdrill Not tainted 4.1.0-rc7 #14 [ 36.006085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 36.008250] 00000000000004f2 ffff88007f8838a8 ffffffff8171d53a ffff880075a084a8 [ 36.009630] ffff880075a08000 ffff88007f8838c8 ffffffff810967d3 ffff88007f883928 [ 36.011076] 0000000000000000 ffff88007f8838f8 ffffffff81096892 ffff88007f89be00 [ 36.012494] Call Trace: [ 36.012953] [] dump_stack+0x4f/0x6d [ 36.014085] [] ___might_sleep+0x103/0x170 [ 36.015117] [] __might_sleep+0x52/0x90 [ 36.016117] [] kmem_cache_alloc_trace+0x47/0x190 [ 36.017266] [] ? tcp_fastopen_reset_cipher+0x42/0x130 [ 36.018485] [] tcp_fastopen_reset_cipher+0x42/0x130 [ 36.019679] [] tcp_fastopen_init_key_once+0x61/0x70 [ 36.020884] [] __tcp_fastopen_cookie_gen+0x1c/0x60 [ 36.022058] [] tcp_try_fastopen+0x58f/0x730 [ 36.023118] [] tcp_conn_request+0x3e8/0x7b0 [ 36.024185] [] ? __module_text_address+0x12/0x60 [ 36.025327] [] tcp_v4_conn_request+0x51/0x60 [ 36.026410] [] tcp_rcv_state_process+0x190/0xda0 [ 36.027556] [] ? __inet_lookup_established+0x47/0x170 [ 36.028784] [] tcp_v4_do_rcv+0x16d/0x3d0 [ 36.029832] [] ? security_sock_rcv_skb+0x16/0x20 [ 36.030936] [] tcp_v4_rcv+0x77a/0x7b0 [ 36.031875] [] ? iptable_filter_hook+0x33/0x70 [ 36.032953] [] ip_local_deliver_finish+0x92/0x1f0 [ 36.034065] [] ip_local_deliver+0x9a/0xb0 [ 36.035069] [] ? ip_rcv+0x3d0/0x3d0 [ 36.035963] [] ip_rcv_finish+0x119/0x330 [ 36.036950] [] ip_rcv+0x2e7/0x3d0 [ 36.037847] [] __netif_receive_skb_core+0x552/0x930 [ 36.038994] [] __netif_receive_skb+0x27/0x70 [ 36.040033] [] process_backlog+0xd2/0x1f0 [ 36.041025] [] net_rx_action+0x122/0x310 [ 36.042007] [] __do_softirq+0x103/0x2f0 [ 36.042978] [] do_softirq_own_stack+0x1c/0x30 This patch moves the call to tcp_fastopen_init_key_once to the places where a listener socket creates its TFO-state, which always happens in user-context (either from the setsockopt, or implicitly during the listen()-call) Cc: Eric Dumazet Cc: Hannes Frederic Sowa Fixes: 222e83d2e0ae ("tcp: switch tcp_fastopen key generation to net_get_random_once") Signed-off-by: Christoph Paasch Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 7 +++++-- net/ipv4/tcp_fastopen.c | 2 -- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b47a4d79d04..a5aa54ea6533 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -228,6 +228,8 @@ int inet_listen(struct socket *sock, int backlog) err = 0; if (err) goto out; + + tcp_fastopen_init_key_once(true); } err = inet_csk_listen_start(sk, backlog); if (err) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1377f2a0472..bb2ce74f6004 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2545,10 +2545,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | - TCPF_LISTEN))) + TCPF_LISTEN))) { + tcp_fastopen_init_key_once(true); + err = fastopen_init_queue(sk, val); - else + } else { err = -EINVAL; + } break; case TCP_TIMESTAMP: if (!tp->repair) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 46b087a27503..f9c0fb84e435 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -78,8 +78,6 @@ static bool __tcp_fastopen_cookie_gen(const void *path, struct tcp_fastopen_context *ctx; bool ok = false; - tcp_fastopen_init_key_once(true); - rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { From d496f7842aada20c61e6044b3395383fa972872c Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 19 Jun 2015 00:46:53 +0200 Subject: [PATCH 12/18] NET: ROSE: Don't dereference NULL neighbour pointer. A ROSE socket doesn't necessarily always have a neighbour pointer so check if the neighbour pointer is valid before dereferencing it. Signed-off-by: Ralf Baechle Tested-by: Bernard Pidoux Cc: stable@vger.kernel.org #2.6.11+ Signed-off-by: David S. Miller --- net/rose/af_rose.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8ae603069a1a..dd304bc40788 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -192,7 +192,8 @@ static void rose_kill_by_device(struct net_device *dev) if (rose->device == dev) { rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); - rose->neighbour->use--; + if (rose->neighbour) + rose->neighbour->use--; rose->device = NULL; } } From 754bc547f0a79f7568b5b81c7fc0a8d044a6571a Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Fri, 19 Jun 2015 01:22:57 -0700 Subject: [PATCH 13/18] bridge: multicast: restore router configuration on port link down/up When a port goes through a link down/up the multicast router configuration is not restored. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Fixes: 0909e11758bd ("bridge: Add multicast_router sysfs entries") Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ff667e18b2d6..761fc733bf6d 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -37,6 +37,8 @@ static void br_multicast_start_querier(struct net_bridge *br, struct bridge_mcast_own_query *query); +static void br_multicast_add_router(struct net_bridge *br, + struct net_bridge_port *port); unsigned int br_mdb_rehash_seq; static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) @@ -936,6 +938,8 @@ void br_multicast_enable_port(struct net_bridge_port *port) #if IS_ENABLED(CONFIG_IPV6) br_multicast_enable(&port->ip6_own_query); #endif + if (port->multicast_router == 2 && hlist_unhashed(&port->rlist)) + br_multicast_add_router(br, port); out: spin_unlock(&br->multicast_lock); From 12b322ac85208de564ecf23aa754d796a91de21f Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Fri, 19 Jun 2015 14:21:51 +0200 Subject: [PATCH 14/18] xen-netback: fix a BUG() during initialization Commit edafc132baac ("xen-netback: making the bandwidth limiter runtime settable") introduced the capability to change the bandwidth rate limit at runtime. But it also introduced a possible crashing bug. If netback receives two XenbusStateConnected without getting the hotplug-status watch firing in between, then it will try to register the watches for the rate limiter again. But this triggers a BUG() in the watch registration code. The fix modifies connect() to remove the possibly existing packet-rate watches before trying to install those watches. This behaviour is in line with how connect() deals with the hotplug-status watch. Signed-off-by: Imre Palik Cc: Matt Wilson Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 968787abf78d..ec383b0f5443 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -681,6 +681,9 @@ static int xen_register_watchers(struct xenbus_device *dev, struct xenvif *vif) char *node; unsigned maxlen = strlen(dev->nodename) + sizeof("/rate"); + if (vif->credit_watch.node) + return -EADDRINUSE; + node = kmalloc(maxlen, GFP_KERNEL); if (!node) return -ENOMEM; @@ -770,6 +773,7 @@ static void connect(struct backend_info *be) } xen_net_read_rate(dev, &credit_bytes, &credit_usec); + xen_unregister_watchers(be->vif); xen_register_watchers(dev, be->vif); read_xenbus_vif_flags(be); From 26c0a14f34fdcdcc82ac278a511fd86597e608e3 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Sun, 21 Jun 2015 01:32:46 +0300 Subject: [PATCH 15/18] net: fs_enet: Fix NETIF_F_SG feature for Freescale MPC5121 Commit 4fc9b87bae25 ("net: fs_enet: Implement NETIF_F_SG feature") brings a trouble to Freescale MPC512x: a kernel oops happens during sending non-linear sk_buff with .data not aligned by 4. Log quotation: Unable to handle kernel paging request for data at address 0xe467c000 Faulting instruction address: 0xc000cd44 Oops: Kernel access of bad area, sig: 11 [#1] MPC512x generic Modules linked in: CPU: 0 PID: 984 Comm: kworker/0:1H Not tainted 4.1.0-rc8-00024-gbb16140 #2 Workqueue: rpciod rpc_async_schedule task: cf364a50 ti: cf362000 task.ti: cf362000 NIP: c000cd44 LR: c000c720 CTR: 00000206 REGS: cf363ac0 TRAP: 0300 Not tainted (4.1.0-rc8-00024-gbb16140) MSR: 00009032 CR: 42004082 XER: 00000000 DAR: e467c000 DSISR: 20000000 GPR00: c0279e24 cf363b70 cf364a50 e467c000 00000206 0000001f 00000001 00000001 GPR08: 00000000 e467c000 e46800be 000139a6 82002082 00000000 c002e46c cf3c3680 GPR16: c044cb30 c04b0000 cf363c48 00000000 00000001 fde0315c 00000000 0000000b GPR24: 0000002c 000040be cf339aa0 0000000b 00000001 cf873210 00282f85 00000000 NIP [c000cd44] clean_dcache_range+0x1c/0x30 LR [c000c720] dma_direct_map_page+0x40/0x94 Call Trace: [cf363b70] [cf339b60] 0xcf339b60 (unreliable) [cf363b90] [c0279e24] fs_enet_start_xmit+0x1c8/0x42c [cf363bd0] [c02ff710] dev_hard_start_xmit+0x2dc/0x3d4 [cf363c40] [c0319c60] sch_direct_xmit+0xcc/0x1cc [cf363c70] [c02ff9c0] __dev_queue_xmit+0x1b8/0x47c [cf363ca0] [c032a3e8] ip_finish_output+0x1fc/0x9a8 [cf363ce0] [c032c31c] ip_send_skb+0x1c/0xa4 [cf363cf0] [c035112c] udp_send_skb+0xe4/0x2e8 [cf363d10] [c0351368] udp_push_pending_frames+0x38/0x84 [cf363d20] [c03537b8] udp_sendpage+0x134/0x174 [cf363d70] [c0384fd4] xs_sendpages+0x21c/0x250 [cf363db0] [c03852bc] xs_udp_send_request+0x50/0xf8 [cf363de0] [c0382f08] xprt_transmit+0x64/0x280 [cf363e20] [c038017c] call_transmit+0x168/0x234 [cf363e40] [c0387918] __rpc_execute+0x88/0x2b0 [cf363e80] [c00296f8] process_one_work+0x124/0x2fc [cf363ea0] [c0029a00] worker_thread+0x130/0x480 [cf363ef0] [c002e528] kthread+0xbc/0xd0 [cf363f40] [c000e4a8] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 7c70faa6 60630800 7c70fba6 4c00012c 4e800020 38a0001f 7c632878 7c832050 7c842a14 5484d97f 4d820020 7c8903a6 <7c00186c> 38630020 4200fff8 7c0004ac ---[ end trace c846c1eceb513c85 ]--- The reason: MPC5121 FEC requires 4-byte alignment for TX data buffer and calls tx_skb_align_workaround() for copying sk_buff with not aligned .data to a new sk_buff with aligned one. But tx_skb_align_workaround() uses skb_copy_from_linear_data() which doesn't work for non-linear sk_buff: a new sk_buff has non-zero nr_frags and zero .data_len. So improve the condition of calling tx_skb_align_workaround() and use skb_linearize() in it. Signed-off-by: Alexander Popov Signed-off-by: David S. Miller --- .../ethernet/freescale/fs_enet/fs_enet-main.c | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 9b3639eae676..d92802bccaf4 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -490,6 +490,9 @@ static struct sk_buff *tx_skb_align_workaround(struct net_device *dev, { struct sk_buff *new_skb; + if (skb_linearize(skb)) + return NULL; + /* Alloc new skb */ new_skb = netdev_alloc_skb(dev, skb->len + 4); if (!new_skb) @@ -515,12 +518,27 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) cbd_t __iomem *bdp; int curidx; u16 sc; - int nr_frags = skb_shinfo(skb)->nr_frags; + int nr_frags; skb_frag_t *frag; int len; - #ifdef CONFIG_FS_ENET_MPC5121_FEC - if (((unsigned long)skb->data) & 0x3) { + int is_aligned = 1; + int i; + + if (!IS_ALIGNED((unsigned long)skb->data, 4)) { + is_aligned = 0; + } else { + nr_frags = skb_shinfo(skb)->nr_frags; + frag = skb_shinfo(skb)->frags; + for (i = 0; i < nr_frags; i++, frag++) { + if (!IS_ALIGNED(frag->page_offset, 4)) { + is_aligned = 0; + break; + } + } + } + + if (!is_aligned) { skb = tx_skb_align_workaround(dev, skb); if (!skb) { /* @@ -532,6 +550,7 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) } } #endif + spin_lock(&fep->tx_lock); /* @@ -539,6 +558,7 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) */ bdp = fep->cur_tx; + nr_frags = skb_shinfo(skb)->nr_frags; if (fep->tx_free <= nr_frags || (CBDR_SC(bdp) & BD_ENET_TX_READY)) { netif_stop_queue(dev); spin_unlock(&fep->tx_lock); From a076e6bfe7fe92a3825b3d1448e6f988fd033722 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Tue, 23 Jun 2015 10:52:10 +0300 Subject: [PATCH 16/18] rocker: call correct unregister function on error Use the correct unregister function matching the register function on the error path. Signed-off-by: Gilad Ben-Yossef Fixes: c1beeef7a32a791a ("rocker: implement IPv4 fib offloading") Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index cf98cc9bbc8d..3479573a6d78 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -5040,7 +5040,7 @@ static int __init rocker_module_init(void) return 0; err_pci_register_driver: - unregister_netdevice_notifier(&rocker_netevent_nb); + unregister_netevent_notifier(&rocker_netevent_nb); unregister_netdevice_notifier(&rocker_netdevice_nb); return err; } From 34b99df4e6256ddafb663c6de0711dceceddfe0e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 23 Jun 2015 08:34:39 +0300 Subject: [PATCH 17/18] ip: report the original address of ICMP messages ICMP messages can trigger ICMP and local errors. In this case serr->port is 0 and starting from Linux 4.0 we do not return the original target address to the error queue readers. Add function to define which errors provide addr_offset. With this fix my ping command is not silent anymore. Fixes: c247f0534cc5 ("ip: fix error queue empty skb handling") Signed-off-by: Julian Anastasov Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 11 ++++++++++- net/ipv6/datagram.c | 12 +++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7cfb0893f263..6ddde89996f4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -432,6 +432,15 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv4 supports cmsg on all imcp errors and some timestamps * * Timestamp code paths do not initialize the fields expected by cmsg: @@ -498,7 +507,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv4_datagram_support_addr(serr)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 762a58c772b8..62d908e64eeb 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 || + serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. * * At one point, excluding local errors was a quick test to identify icmp/icmp6 @@ -389,7 +399,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv6_datagram_support_addr(serr)) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; From f1590670ce069eefeb93916391a67643e6ad1630 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 24 Jun 2015 11:47:41 +0300 Subject: [PATCH 18/18] stmmac: troubleshoot unexpected bits in des0 & des1 Current implementation of descriptor init procedure only takes care about setting/clearing ownership flag in "des0"/"des1" fields while it is perfectly possible to get unexpected bits set because of the following factors: [1] On driver probe underlying memory allocated with dma_alloc_coherent() might not be zeroed and so it will be filled with garbage. [2] During driver operation some bits could be set by SD/MMC controller (for example error flags etc). And unexpected and/or randomly set flags in "des0"/"des1" fields may lead to unpredictable behavior of GMAC DMA block. This change addresses both items above with: [1] Use of dma_zalloc_coherent() instead of simple dma_alloc_coherent() to make sure allocated memory is zeroed. That shouldn't affect performance because this allocation only happens once on driver probe. [2] Do explicit zeroing of both "des0" and "des1" fields of all buffer descriptors during initialization of DMA transfer. And while at it fixed identation of dma_free_coherent() counterpart as well. Signed-off-by: Alexey Brodkin Cc: Giuseppe Cavallaro Cc: arc-linux-dev@synopsys.com Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Cc: David Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/descs.h | 2 + .../net/ethernet/stmicro/stmmac/enh_desc.c | 3 +- .../net/ethernet/stmicro/stmmac/norm_desc.c | 3 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 44 +++++++++---------- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index ad3996038018..799c2929c536 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -158,6 +158,8 @@ struct dma_desc { u32 buffer2_size:13; u32 reserved4:3; } etx; /* -- enhanced -- */ + + u64 all_flags; } des01; unsigned int des2; unsigned int des3; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 1e2bcf5f89e1..7d944449f5ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -240,6 +240,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.erx.own = 1; p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1; @@ -254,7 +255,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.etx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ehn_desc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 35ad4f427ae2..48c3456445b2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -123,6 +123,7 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.rx.own = 1; p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1; @@ -137,7 +138,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.tx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ndesc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2c5ce2baca87..2e299e0949d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1189,41 +1189,41 @@ static int alloc_dma_desc_resources(struct stmmac_priv *priv) goto err_tx_skbuff; if (priv->extend_desc) { - priv->dma_erx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct - dma_extended_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_erx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct + dma_extended_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_erx) goto err_dma; - priv->dma_etx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct - dma_extended_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_etx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct + dma_extended_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_etx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_extended_desc), - priv->dma_erx, priv->dma_rx_phy); + sizeof(struct dma_extended_desc), + priv->dma_erx, priv->dma_rx_phy); goto err_dma; } } else { - priv->dma_rx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct dma_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_rx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct dma_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_rx) goto err_dma; - priv->dma_tx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct dma_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_tx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct dma_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_tx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_desc), - priv->dma_rx, priv->dma_rx_phy); + sizeof(struct dma_desc), + priv->dma_rx, priv->dma_rx_phy); goto err_dma; } }